Extending Many Variables To Cover The Same Time Domain

In this tutorial we create two variables over two different but overlapping time domains. We will grow both variables to span the same (union) domain, filling the added times with missing data

The CDAT software was developed by LLNL. This tutorial was written by Charles Doutriaux. This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344.

Download the Jupyter Notebook

Creating the Variables

Back to Top

Let's create two distinct variables (monthly time series)

variable 1 going from 1989 through 2010 variable 2 going from 2000 through 2017

In [1]:
import cdms2
import numpy
import MV2
import cdtime
import cdutil

def createTimeAxis(start, end):
    """ Create a monthly time axis going from a start date to another"""
    if isinstance(start, int):
        start = cdtime.comptime(start)
    if isinstance(end, int):
        end = cdtime.comptime(end, 12, 31)
    # Figure out how many month are there
    units = "months since {}".format(start)
    n = end.torelative(units).value + 1
    # Create time axis
    time = cdms2.createAxis(numpy.arange(n))
    time.id = "time"
    time.units = units
    time.designateTime()
    cdutil.setTimeBoundsMonthly(time)
    return time

def createData(start, end, shape=()):
    """ Create an array of random monthly data going from start to end, you can also pass shape for additional data"""
    time = createTimeAxis(start, end)
    n = len(time)
    # Add time at beginning
    shape = (n,) + shape
    data = MV2.array(numpy.random.randn(*shape))
    data.setAxis(0,time)
    data.id = "data"
    return data

data1 = createData(1989, 2010)
data2 = createData(2000, 2017)

Function Create "Grown" Data

Back to Top

In [2]:
def missingMonths(time, start, end):
    """ given a time axis and a staert and end date, returns how mny months are missing before and after the time axis"""
    # Before the time axis starts
    if time[0].cmp(start)<=0:
        before = 0
    else:
        units = "months since {}".format(start)
        before = time[0].torelative(units).value
        
    # After the time axis ends
    if time[-1].cmp(end)>=0:
        end = 0
    else:
        units = "months since {}".format(time[-1])
        end = end.torelative(units).value
    return int(before), int(end)

def grow(data, start, end):
    """ Given an array and a start and end date, grows the array to fill the full time range """
    order = data.getOrder(ids=True)
    data = data(order=('t...'))
    tc = data.getTime().asComponentTime()
    b, e = missingMonths(tc, start, end)
    # Prepare the new data
    sh = list(data.shape)
    sh[0] = sh[0] + b + e
    new = MV2.ones(sh)
    new = MV2.masked_greater(new,0.)  # mask everywhere
    if e != 0:
        new[b:-e] = data[:]
    else:
        new[b:] = data[:]
    new_time = cdms2.createAxis(numpy.arange(sh[0]))
    new_time.units= "months since {}".format(start)
    new_time.id = "time"
    new_time.designateTime()
    cdutil.setAxisTimeBoundsMonthly(new_time)
    new.setAxis(0,new_time)
    # set the old axes
    for i, axis in enumerate(data.getAxisList()[1:]):
        new.setAxis(i+1,ax)
        
    new.id = data.id
    return new(order=order)
    
        
def growDatasets(*arrays):
    """ Given N cdms2 transient variables, grow them both to start and end at the same time, filling the rest with missing values"""
    
    start = None
    end = None
    for data in arrays:
        # Get time axis (as component time)
        tc = data.getTime().asComponentTime()
    
        # figure out which data start first and end last
        if start is None or tc[0].cmp(start) <=0:  # data starts first
            start = tc[0]
        if end is None or tc[-1].cmp(end) >=0:  # data ends last
            end = tc[-1]
        
    # Figure out how many months are needed
    for data in arrays:
        yield grow(data, start, end)
        
    

Use the function

Back to Top

In [3]:
data1, data2 = growDatasets(data1, data2)

print(data1.shape, data1[0], data1[-1], data2[0], data2[-1])
(348,) 0.13940522192757862 -- -- -0.7736267051786273
/software/anaconda53/envs/cdms2/lib/python3.7/site-packages/numpy/ma/core.py:3174: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  dout = self.data[indx]
/software/anaconda53/envs/cdms2/lib/python3.7/site-packages/numpy/ma/core.py:3206: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  mout = _mask[indx]