The CDAT software was developed by LLNL. This tutorial was written by Charles Doutriaux. This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344.
!conda create -n cdms -y -c cdat/label/nightly -c conda-forge cdms2 libnetcdf==4.6.2
!conda activate cdms
from __future__ import print_function
import cdat_info
import os, sys
data_path = cdat_info.get_sampledata_path()
version="python"+sys.version[0:3]
cdat_info.download_sample_data_files(os.path.join(sys.prefix,"lib",version,"site-packages","share","cdms2","test_data_files.txt"),data_path)
# Open a sample file
import cdms2
filename = os.path.join(data_path,"clt.nc")
f = cdms2.open(filename)
# Query variables in the file
var = f.listvariable()
print("variables in the file:",var)
# Query dimensions in the file
dims = f.listdimension()
print("Dimensions in the file:",dims)
# Query file attributes
attr = f.listglobal()
print("File attributes:",attr)
You can further query the variables in the file without having to read them in memory
To create a file variable
simply use square bracket: [ and ]
clt = f["clt"] # This is a file variable, not in memory
# Print variable info to screen
clt.info()
# Variable shape
sh = clt.shape
print("The variable shape is:",sh)
# Variable id
name = clt.id
print("Variable id/name:",name)
# The variable dimensions
axes = clt.getAxisList()
print("variable dimensions:",axes)
# Variable attributes
attributes = clt.attributes
print("Variable attributes:",attributes.keys())
# Determine if an axis is time
for a in axes:
if a.isTime():
print("Axes %s is a time axis" % a.id)
else:
print("Axes %s is not a time axis" % a.id)
# Similar functions exist for level, latitude and longitude
for a in axes:
print(a.isLatitude(), a.isLongitude(), a.isLevel())
# Similarly we can get one of these 4 types of dimension automatically
aTime = clt.getTime()
lat = clt.getLatitude()
lon = clt.getLongitude()
# if such dimension does not exists None is returned
lev = clt.getLevel()
print("Level dim:",lev)
# Any dimension can also by retrieved by its index
dim0 = clt.getAxis(0)
print("The first dim name is:",dim0.id)
# Dimension information
dim0.info()
# Accessing axis values
print("Latitude values:",clt.getLatitude()[:])
cdms is really good at dealing with times (see decdicated cdtime jupyter notebook for more on time)
# Rather than raw (in file) values or indices it can be usefull to show/manipulate time
# as 'component time'
tim = clt.getTime()
tc = tim.asComponentTime()
print("First 2 times are:",tc[:2])
# or 'relative times'
tr = tim.asRelativeTime("days since 2017")
print("first 2 times in days since 2017:", tr[:2])
# Whole
clt =f("clt") # parentheis means read in memory
print("Shape:",clt.shape)
# Partial, based on values in file
clt = f("clt",latitude=(0,90),longitude=(-180,180))
print("Shape:",clt.shape)
# Based on indices
clt = f("clt",time=slice(0,12))
print("Shape:",clt.shape)
# time can be retirieved based on actual dates (provided units are good in file)
clt = f("clt",time=("1980","1983-12-31"))
print("Shape:",clt.shape)
# Data can also be read directly from a file variable
CLT = f["clt"]
clt = CLT(time=("1980","1984-12-31"),latitude=(0,90),longitude=slice(0,None))
print("Shape:",clt.shape)
# Or from an exisitng variavle
clt2 = clt(time=slice(0,4))
print("Shape:",clt2.shape)
# data can also be reordered based on dimensions
clt = f("clt",order="xty")
print("Shape:",clt.shape)
# or use dimension indices
clt=f("clt", order="210")
print("Shape:",clt.shape)
# or use dimension names
clt = f("clt",order="(longitude)(time)(latitude)")
print("Shape:",clt.shape)
cdms variables are subclass of numpy, so for the most part anything you can do with numpy can be done with cdms variables
# Extract same month every years (from monthly data)
clt=f("clt")
subset = clt[::12]
print("Shape:",subset.shape)
# cdms variable can be converted to raw numpy
nparray = clt.filled()
print(type(clt),type(nparray))
# or masked arrays
maarray = clt.asma()
print(type(clt),type(maarray))
import MV2
# Create a cdms variable from a numpy (or numpy.ma) array
myvar = MV2.array(nparray)
myvar.id = "newclt"
myvar.info()
# We can . add axes from other variables
myvar.setAxisList(clt.getAxisList())
myvar.info()
# we can also add axes one at a time
for i in range(myvar.ndim):
ax = clt.getAxis(i)
print("Setting axis %i to %s" % (i,ax.id))
myvar.setAxis(i,ax)
myvar.info()
# We can also create axes manually
newtime = cdms2.createAxis(range(120))
newtime.id = "time" # name of dimension
newtime.designateTime() # tell cdms to add attributes that make it time
newtime.units = "months since 2017"
myvar.setAxis(0,newtime)
myvar.info() # Notice tikme changed
# By default cdms2 will save files in NetCDF4 compressed with no shuffle by defalted at level 1
print("Default Shuffle:",cdms2.getNetcdfShuffleFlag())
print("Default Deflate:",cdms2.getNetcdfDeflateFlag())
print("Default Deflate Level:",cdms2.getNetcdfDeflateLevelFlag())
# Let's turn it all off so we get NetCDF3 classic files
value = 0
cdms2.setNetcdfShuffleFlag(value) ## where value is either 0 or 1
cdms2.setNetcdfDeflateFlag(value) ## where value is either 0 or 1
cdms2.setNetcdfDeflateLevelFlag(value) ## where value is a integer between 0 and 9 included
print("Shuffle:",cdms2.getNetcdfShuffleFlag())
print("Deflate:",cdms2.getNetcdfDeflateFlag())
print("Deflate Level:",cdms2.getNetcdfDeflateLevelFlag())
# Let's open a file for writing
f2 = cdms2.open("mydata.nc","w") # "w" means open file for writing and erase if already here
f2.write(myvar)
f2.close()