A collection of diagnostic and interpolation routines for use with output from the Weather Research and Forecasting (WRF-ARW) Model.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1674 lines
55 KiB

from __future__ import (absolute_import, division, print_function,
unicode_literals)
import os
from sys import version_info
from copy import copy
from collections import Iterable, Mapping, OrderedDict
from itertools import product
from types import GeneratorType
import datetime as dt
from math import floor, copysign
from inspect import getmodule
try:
from inspect import signature
except ImportError:
from inspect import getargspec
try:
from inspect import getargvalues
except ImportError:
from inspect import getgeneratorlocals
import numpy as np
import numpy.ma as ma
from .config import xarray_enabled
from .projection import getproj, NullProjection
from .constants import Constants, ALL_TIMES
from .py3compat import (viewitems, viewkeys, viewvalues, isstr, py2round,
py3range, ucode)
from .cache import cache_item, get_cached_item
if xarray_enabled():
from xarray import DataArray
from pandas import NaT
_COORD_PAIR_MAP = {"XLAT" : ("XLAT", "XLONG"),
"XLONG" : ("XLAT", "XLONG"),
"XLAT_M" : ("XLAT_M", "XLONG_M"),
"XLONG_M" : ("XLAT_M", "XLONG_M"),
"XLAT_U" : ("XLAT_U", "XLONG_U"),
"XLONG_U" : ("XLAT_U", "XLONG_U"),
"XLAT_V" : ("XLAT_V", "XLONG_V"),
"XLONG_V" : ("XLAT_V", "XLONG_V"),
"CLAT" : ("CLAT", "CLONG"),
"CLONG" : ("CLAT", "CLONG")}
_COORD_VARS = ("XLAT", "XLONG", "XLAT_M", "XLONG_M", "XLAT_U", "XLONG_U",
"XLAT_V", "XLONG_V", "CLAT", "CLONG")
_LAT_COORDS = ("XLAT", "XLAT_M", "XLAT_U", "XLAT_V", "CLAT")
_LON_COORDS = ("XLONG", "XLONG_M", "XLONG_U","XLONG_V", "CLONG")
_TIME_COORD_VARS = ("XTIME",)
def is_time_coord_var(varname):
return varname in _TIME_COORD_VARS
def get_coord_pairs(varname):
return _COORD_PAIR_MAP[varname]
def is_multi_time_req(timeidx):
return timeidx is None
def is_multi_file(wrfnc):
return (isinstance(wrfnc, Iterable) and not isstr(wrfnc))
def has_time_coord(wrfnc):
return "XTIME" in wrfnc.variables
def is_mapping(wrfnc):
return isinstance(wrfnc, Mapping)
def _generator_copy(gen):
funcname = gen.__name__
try:
argvals = getargvalues(gen.gi_frame)
except NameError:
argvals = getgeneratorlocals(gen)
module = getmodule(gen.gi_frame)
if module is not None:
res = module.get(funcname)(**argvals.locals)
else:
# Created in jupyter or the python interpreter
import __main__
res = getattr(__main__, funcname)(**argvals.locals)
return res
def test():
q = [1,2,3]
for i in q:
yield i
class TestGen(object):
def __init__(self, count=3):
self._total = count
self._i = 0
def __iter__(self):
return self
def next(self):
if self._i >= self._total:
raise StopIteration
else:
val = self._i
self._i += 1
return val
# Python 3
def __next__(self):
return self.next()
def latlon_coordvars(d):
lat_coord = None
lon_coord = None
for name in _LAT_COORDS:
if name in viewkeys(d):
lat_coord = name
break
for name in _LON_COORDS:
if name in viewkeys(d):
lon_coord = name
break
return lat_coord, lon_coord
def is_coordvar(varname):
return varname in _COORD_VARS
class IterWrapper(Iterable):
"""A wrapper class for generators and custom iterable classes which returns
a new iterator from the start of the sequence when __iter__ is called"""
def __init__(self, wrapped):
self._wrapped = wrapped
def __iter__(self):
if isinstance(self._wrapped, GeneratorType):
return _generator_copy(self._wrapped)
else:
obj_copy = copy(self._wrapped)
return obj_copy.__iter__()
def get_iterable(wrfseq):
"""Returns a resetable iterable object."""
if not is_multi_file(wrfseq):
return wrfseq
else:
if not is_mapping(wrfseq):
if isinstance(wrfseq, (list, tuple, IterWrapper)):
return wrfseq
else:
return IterWrapper(wrfseq) # generator/custom iterable class
else:
if isinstance(wrfseq, dict):
return wrfseq
else:
return dict(wrfseq) # generator/custom iterable class
# Helper to extract masked arrays from DataArrays that convert to NaN
def npvalues(array_type):
if not isinstance(array_type, DataArray):
result = array_type
else:
try:
fill_value = array_type.attrs["_FillValue"]
except KeyError:
result = array_type.values
else:
result = ma.masked_invalid(array_type.values, copy=False)
result.set_fill_value(fill_value)
return result
# Helper utilities for metadata
class either(object):
def __init__(self, *varnames):
self.varnames = varnames
def __call__(self, wrfnc):
if is_multi_file(wrfnc):
if not is_mapping(wrfnc):
wrfnc = next(iter(wrfnc))
else:
entry = wrfnc[next(iter(viewkeys(wrfnc)))]
return self(entry)
for varname in self.varnames:
if varname in wrfnc.variables:
return varname
raise ValueError("{} are not valid variable names".format(
self.varnames))
class combine_with(object):
# Remove remove_idx first, then insert_idx is applied to removed set
def __init__(self, varname, remove_dims=None, insert_before=None,
new_dimnames=None, new_coords=None):
self.varname = varname
self.remove_dims = remove_dims
self.insert_before = insert_before
self.new_dimnames = new_dimnames if new_dimnames is not None else []
self.new_coords = (new_coords if new_coords is not None
else OrderedDict())
def __call__(self, var):
new_dims = list(var.dims)
new_coords = OrderedDict(var.coords)
if self.remove_dims is not None:
for dim in self.remove_dims:
new_dims.remove(dim)
del new_coords[dim]
if self.insert_before is not None:
insert_idx = new_dims.index(self.insert_before)
new_dims = (new_dims[0:insert_idx] + self.new_dimnames +
new_dims[insert_idx:])
elif self.new_dimnames is not None:
new_dims = self.new_dimnames
if self.new_coords is not None:
new_coords.update(self.new_coords)
return new_dims, new_coords
# This should look like:
# [(0, (-3,-2)), # variable 1
# (1, -1)] # variable 2
class combine_dims(object):
def __init__(self, pairs):
self.pairs = pairs
def __call__(self, *args):
result = []
for pair in self.pairs:
var = args[pair[0]]
dimidxs = pair[1]
if isinstance(dimidxs, Iterable):
for dimidx in dimidxs:
result.append(var.shape[dimidx])
else:
result.append(var.shape[dimidxs])
return tuple(result)
class from_var(object):
def __init__(self, varname, attribute):
self.varname = varname
self.attribute = attribute
def __call__(self, wrapped, *args, **kwargs):
vard = from_args(wrapped, (self.varname,), *args, **kwargs)
var = None
if vard is not None:
var = vard[self.varname]
if not isinstance(var, DataArray):
return None
return var.attrs.get(self.attribute, None)
def _corners_moved(wrfnc, first_ll_corner, first_ur_corner, latvar, lonvar):
lats = wrfnc.variables[latvar]
lons = wrfnc.variables[lonvar]
# Need to check all times
for i in py3range(lats.shape[-3]):
start_idxs = [0]*len(lats.shape) # PyNIO does not support ndim
start_idxs[-3] = i
start_idxs = tuple(start_idxs)
end_idxs = [-1]*len(lats.shape)
end_idxs[-3] = i
end_idxs = tuple(end_idxs)
if (first_ll_corner[0] != lats[start_idxs] or
first_ll_corner[1] != lons[start_idxs] or
first_ur_corner[0] != lats[end_idxs] or
first_ur_corner[1] != lons[end_idxs]):
return True
return False
def is_moving_domain(wrfseq, varname=None, latvar=either("XLAT", "XLAT_M"),
lonvar=either("XLONG", "XLONG_M"), _key=None):
if isinstance(latvar, either):
latvar = latvar(wrfseq)
if isinstance(lonvar, either):
lonvar = lonvar(wrfseq)
# In case it's just a single file
if not is_multi_file(wrfseq):
wrfseq = [wrfseq]
# Slow, but safe. Compare the corner points to the first item and see
# any move. User iterator protocol in case wrfseq is not a list/tuple.
if not is_mapping(wrfseq):
wrf_iter = iter(wrfseq)
first_wrfnc = next(wrf_iter)
else:
# Currently only checking the first dict entry.
dict_key = next(iter(viewkeys(wrfseq)))
entry = wrfseq[dict_key]
key = _key[dict_key] if _key is not None else None
return is_moving_domain(entry, varname, latvar, lonvar, key)
# The long way of checking all lat/lon corner points. Doesn't appear
# to be a shortcut in the netcdf files.
if varname is not None:
try:
coord_names = getattr(first_wrfnc.variables[varname],
"coordinates").split()
except AttributeError:
# Variable doesn't have a coordinates attribute, use the
# arguments
lon_coord = lonvar
lat_coord = latvar
else:
lon_coord = coord_names[0]
lat_coord = coord_names[1]
else:
lon_coord = lonvar
lat_coord = latvar
# See if there is a cached value
product = "is_moving_{}_{}".format(lat_coord, lon_coord)
moving = get_cached_item(_key, product)
if moving is not None:
return moving
# Need to search all the files
lats = first_wrfnc.variables[lat_coord]
lons = first_wrfnc.variables[lon_coord]
zero_idxs = [0]*len(lats.shape) # PyNIO doesn't have ndim
last_idxs = list(zero_idxs)
last_idxs[-2:] = [-1]*2
zero_idxs = tuple(zero_idxs)
last_idxs = tuple(last_idxs)
lat0 = lats[zero_idxs]
lat1 = lats[last_idxs]
lon0 = lons[zero_idxs]
lon1 = lons[last_idxs]
ll_corner = (lat0, lon0)
ur_corner = (lat1, lon1)
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
if _corners_moved(wrfnc, ll_corner, ur_corner,
lat_coord, lon_coord):
cache_item(_key, product, True)
return True
cache_item(_key, product, False)
return False
def _get_global_attr(wrfnc, attr):
val = getattr(wrfnc, attr, None)
# PyNIO puts single values in to an array
if isinstance(val, np.ndarray):
if len(val) == 1:
return val[0]
return val
def extract_global_attrs(wrfnc, attrs):
if isstr(attrs):
attrlist = [attrs]
else:
attrlist = attrs
multifile = is_multi_file(wrfnc)
if multifile:
if not is_mapping(wrfnc):
wrfnc = next(iter(wrfnc))
else:
entry = wrfnc[next(iter(viewkeys(wrfnc)))]
return extract_global_attrs(entry, attrs)
return {attr:_get_global_attr(wrfnc, attr) for attr in attrlist}
def extract_dim(wrfnc, dim):
if is_multi_file(wrfnc):
if not is_mapping(wrfnc):
wrfnc = next(iter(wrfnc))
else:
entry = wrfnc[next(iter(viewkeys(wrfnc)))]
return extract_dim(entry, dim)
d = wrfnc.dimensions[dim]
if not isinstance(d, int):
return len(d) #netCDF4
return d # PyNIO
def _combine_dict(wrfdict, varname, timeidx, method, meta, _key):
"""Dictionary combination creates a new left index for each key, then
does a cat or join for the list of files for that key"""
keynames = []
numkeys = len(wrfdict)
key_iter = iter(viewkeys(wrfdict))
first_key = next(key_iter)
keynames.append(first_key)
is_moving = is_moving_domain(wrfdict, varname, _key=_key)
# Not quite sure how to handle coord caching with dictionaries, so
# disabling it for now by setting _key to None.
first_array = _extract_var(wrfdict[first_key], varname,
timeidx, is_moving=is_moving, method=method,
squeeze=False, cache=None, meta=meta,
_key=_key[first_key])
# Create the output data numpy array based on the first array
outdims = [numkeys]
outdims += first_array.shape
outdata = np.empty(outdims, first_array.dtype)
outdata[0,:] = first_array[:]
idx = 1
while True:
try:
key = next(key_iter)
except StopIteration:
break
else:
keynames.append(key)
vardata = _extract_var(wrfdict[key], varname, timeidx,
is_moving=is_moving, method=method,
squeeze=False, cache=None, meta=meta,
_key=_key[key])
if outdata.shape[1:] != vardata.shape:
raise ValueError("data sequences must have the "
"same size for all dictionary keys")
outdata[idx,:] = npvalues(vardata)[:]
idx += 1
if xarray_enabled() and meta:
outname = str(first_array.name)
# Note: assumes that all entries in dict have same coords
outcoords = OrderedDict(first_array.coords)
# First find and store all the existing key coord names/values
# This is applicable only if there are nested dictionaries.
key_coordnames = []
coord_vals = []
existing_cnt = 0
while True:
key_coord_name = "key_{}".format(existing_cnt)
if key_coord_name not in first_array.dims:
break
key_coordnames.append(key_coord_name)
coord_vals.append(npvalues(first_array.coords[key_coord_name]))
existing_cnt += 1
# Now add the key coord name and values for THIS dictionary.
# Put the new key_n name at the bottom, but the new values will
# be at the top to be associated with key_0 (left most). This
# effectively shifts the existing 'key_n' coordinate values to the
# right one dimension so *this* dicionary's key coordinate values
# are at 'key_0'.
key_coordnames.append(key_coord_name)
coord_vals.insert(0, keynames)
# make it so that key_0 is leftmost
outdims = key_coordnames + list(first_array.dims[existing_cnt:])
# Create the new 'key_n', value pairs
for coordname, coordval in zip(key_coordnames, coord_vals):
outcoords[coordname] = coordval
outattrs = OrderedDict(first_array.attrs)
outarr = DataArray(outdata, name=outname, coords=outcoords,
dims=outdims, attrs=outattrs)
else:
outarr = outdata
return outarr
def _find_coord_names(coords):
try:
lat_coord = [name for name in _COORD_VARS[0::2] if name in coords][0]
except IndexError:
lat_coord = None
try:
lon_coord = [name for name in _COORD_VARS[1::2] if name in coords][0]
except IndexError:
lon_coord = None
try:
xtime_coord = [name for name in _TIME_COORD_VARS if name in coords][0]
except IndexError:
xtime_coord = None
return lat_coord, lon_coord, xtime_coord
def _find_max_time_size(wrfseq):
wrf_iter = iter(wrfseq)
max_times = 0
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
t = extract_dim(wrfnc, "Time")
max_times = t if t >= max_times else max_times
return max_times
def _build_data_array(wrfnc, varname, timeidx, is_moving_domain, is_multifile,
_key):
# Note: wrfnc is always a single netcdf file object
# is_moving_domain and is_multifile are arguments indicating if the
# single file came from a sequence, and if that sequence is has a moving
# domain. Both arguments are used mainly for coordinate extraction and
# caching.
multitime = is_multi_time_req(timeidx)
time_idx_or_slice = timeidx if not multitime else slice(None)
var = wrfnc.variables[varname]
data = var[time_idx_or_slice, :]
time_coord = None
# Want to preserve the time dimension
if not multitime:
data = data[np.newaxis, :]
attrs = OrderedDict(var.__dict__)
dimnames = var.dimensions[-data.ndim:]
# WRF variables will have a coordinates attribute. MET_EM files have
# a stagger attribute which indicates the coordinate variable.
try:
# WRF files
coord_attr = getattr(var, "coordinates")
except AttributeError:
if is_coordvar(varname):
# Coordinate variable (most likely XLAT or XLONG)
lat_coord, lon_coord = get_coord_pairs(varname)
time_coord = None
if has_time_coord(wrfnc):
time_coord = "XTIME"
elif is_time_coord_var(varname):
lon_coord = None
lat_coord = None
time_coord = None
else:
try:
# met_em files
stag_attr = getattr(var, "stagger")
except AttributeError:
lon_coord = None
lat_coord = None
else:
# For met_em files, use the stagger name to get the lat/lon var
lat_coord = "XLAT_{}".format(stag_attr)
lon_coord = "XLONG_{}".format(stag_attr)
else:
coord_names = coord_attr.split()
lon_coord = coord_names[0]
lat_coord = coord_names[1]
try:
time_coord = coord_names[2]
except IndexError:
time_coord = None
coords = OrderedDict()
# Handle lat/lon coordinates and projection information if available
if lon_coord is not None and lat_coord is not None:
# Using a cache for coordinate variables so the extraction only happens
# once.
lon_coord_dimkey = lon_coord + "_dim"
lon_coord_valkey = lon_coord + "_val"
lat_coord_dimkey = lat_coord + "_dim"
lat_coord_valkey = lat_coord + "_val"
lon_coord_dims = get_cached_item(_key, lon_coord_dimkey)
lon_coord_vals = get_cached_item(_key, lon_coord_valkey)
if lon_coord_dims is None or lon_coord_vals is None:
lon_var = wrfnc.variables[lon_coord]
lon_coord_dims = lon_var.dimensions
lon_coord_vals = lon_var[:]
# Only cache here if the domain is not moving, otherwise
# caching is handled by cat/join
if not is_moving_domain:
cache_item(_key, lon_coord_dimkey, lon_coord_dims)
cache_item(_key, lon_coord_valkey, lon_coord_vals)
lat_coord_dims = get_cached_item(_key, lat_coord_dimkey)
lat_coord_vals = get_cached_item(_key, lat_coord_valkey)
if lat_coord_dims is None or lat_coord_vals is None:
lat_var = wrfnc.variables[lat_coord]
lat_coord_dims = lat_var.dimensions
lat_coord_vals = lat_var[:]
# Only cache here if the domain is not moving, otherwise
# caching is done in cat/join
if not is_moving_domain:
cache_item(_key, lat_coord_dimkey, lat_coord_dims)
cache_item(_key, lat_coord_valkey, lat_coord_vals)
time_coord_vals = None
if time_coord is not None:
# If not from a multifile sequence, then cache the time
# coordinate. Otherwise, handled in cat/join/
if not is_multifile:
time_coord_vals = get_cached_item(_key, time_coord)
if time_coord_vals is None:
time_coord_vals = wrfnc.variables[time_coord][:]
if not is_multifile:
cache_item(_key, time_coord, time_coord_vals)
else:
time_coord_vals = wrfnc.variables[time_coord][:]
if multitime:
if is_moving_domain:
# Special case with a moving domain in a multi-time file,
# otherwise the projection parameters don't change
coords[lon_coord] = lon_coord_dims, lon_coord_vals
coords[lat_coord] = lat_coord_dims, lat_coord_vals
# Returned lats/lons arrays will have a time dimension, so proj
# will need to be a list due to moving corner points
lats, lons, proj_params = get_proj_params(wrfnc,
timeidx,
varname)
proj = [getproj(lats=lats[i,:],
lons=lons[i,:],
**proj_params) for i in py3range(lats.shape[0])]
else:
coords[lon_coord] = (lon_coord_dims[1:],
lon_coord_vals[0,:])
coords[lat_coord] = (lat_coord_dims[1:],
lat_coord_vals[0,:])
# Domain not moving, so just get the first time
lats, lons, proj_params = get_proj_params(wrfnc, 0, varname)
proj = getproj(lats=lats, lons=lons, **proj_params)
if time_coord is not None:
coords[time_coord] = (lon_coord_dims[0], time_coord_vals)
else:
coords[lon_coord] = (lon_coord_dims[1:],
lon_coord_vals[timeidx,:])
coords[lat_coord] = (lat_coord_dims[1:],
lat_coord_vals[timeidx,:])
if time_coord is not None:
coords[time_coord] = (lon_coord_dims[0],
[time_coord_vals[timeidx]])
lats, lons, proj_params = get_proj_params(wrfnc, 0, varname)
proj = getproj(lats=lats, lons=lons, **proj_params)
attrs["projection"] = proj
if dimnames[0] == "Time":
t = extract_times(wrfnc, timeidx, meta=False, do_xtime=False)
if not multitime:
t = [t]
coords[dimnames[0]] = t
data_array = DataArray(data, name=varname, dims=dimnames, coords=coords,
attrs=attrs)
return data_array
def _find_forward(wrfseq, varname, timeidx, is_moving, meta, _key):
wrf_iter = iter(wrfseq)
comboidx = 0
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
numtimes = extract_dim(wrfnc, "Time")
if timeidx < comboidx + numtimes:
filetimeidx = timeidx - comboidx
if meta:
return _build_data_array(wrfnc, varname, filetimeidx,
is_moving, True, _key)
else:
result = wrfnc.variables[varname][filetimeidx, :]
return result[np.newaxis, :] # So that nosqueeze works
else:
comboidx += numtimes
raise IndexError("timeidx {} is out of bounds".format(timeidx))
def _find_reverse(wrfseq, varname, timeidx, is_moving, meta, _key):
try:
revwrfseq = reversed(wrfseq)
except TypeError:
revwrfseq = reversed(list(wrfseq))
wrf_iter = iter(revwrfseq)
revtimeidx = -timeidx - 1
comboidx = 0
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
numtimes = extract_dim(wrfnc, "Time")
if revtimeidx < comboidx + numtimes:
# Finds the "forward" sequence index, then counts that
# number back from the back of the ncfile times,
# since the ncfile needs to be iterated backwards as well.
filetimeidx = numtimes - (revtimeidx - comboidx) - 1
if meta:
return _build_data_array(wrfnc, varname, filetimeidx,
is_moving, True, _key)
else:
result = wrfnc.variables[varname][filetimeidx, :]
return result[np.newaxis, :] # So that nosqueeze works
else:
comboidx += numtimes
raise IndexError("timeidx {} is out of bounds".format(timeidx))
def _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta, _key):
if timeidx >= 0:
return _find_forward(wrfseq, varname, timeidx, is_moving, meta, _key)
else:
return _find_reverse(wrfseq, varname, timeidx, is_moving, meta, _key)
# TODO: implement in C
def _cat_files(wrfseq, varname, timeidx, is_moving, squeeze, meta, _key):
if is_moving is None:
is_moving = is_moving_domain(wrfseq, varname, _key=_key)
file_times = extract_times(wrfseq, ALL_TIMES, meta=False, do_xtime=False)
multitime = is_multi_time_req(timeidx)
# For single times, just need to find the ncfile and appropriate
# time index, and return that array
if not multitime:
return _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta,
_key)
#time_idx_or_slice = timeidx if not multitime else slice(None)
# If all times are requested, need to build a new array and cat together
# all of the arrays in the sequence
wrf_iter = iter(wrfseq)
if xarray_enabled() and meta:
first_var = _build_data_array(next(wrf_iter), varname,
ALL_TIMES, is_moving, True, _key)
else:
first_var = (next(wrf_iter)).variables[varname][:]
outdims = [len(file_times)]
# Making a new time dim, so ignore this one
outdims += first_var.shape[1:]
outdata = np.empty(outdims, first_var.dtype)
numtimes = first_var.shape[0]
startidx = 0
endidx = numtimes
outdata[startidx:endidx, :] = first_var[:]
if xarray_enabled() and meta:
latname, lonname, timename = _find_coord_names(first_var.coords)
timecached = False
latcached = False
loncached = False
projcached = False
outxtimes = None
outlats = None
outlons = None
outprojs = None
timekey = timename+"_cat" if timename is not None else None
latkey = latname + "_cat" if latname is not None else None
lonkey = lonname + "_cat" if lonname is not None else None
projkey = "projection_cat" if is_moving else None
if timename is not None:
outxtimes = get_cached_item(_key, timekey)
if outxtimes is None:
outxtimes = np.empty(outdims[0])
outxtimes[startidx:endidx] = npvalues(first_var.coords[timename][:])
else:
timecached = True
if is_moving:
outcoorddims = outdims[0:1] + outdims[-2:]
if latname is not None:
# Try to pull from the coord cache
outlats = get_cached_item(_key, latkey)
if outlats is None:
outlats = np.empty(outcoorddims, first_var.dtype)
outlats[startidx:endidx, :] = npvalues(first_var.coords[latname][:])
else:
latcached = True
if lonname is not None:
outlons = get_cached_item(_key, lonkey)
if outlons is None:
outlons = np.empty(outcoorddims, first_var.dtype)
outlons[startidx:endidx, :] = npvalues(first_var.coords[lonname][:])
else:
loncached = True
# Projections also need to be aggregated
outprojs = get_cached_item(_key, projkey)
if outprojs is None:
outprojs = np.empty(outdims[0], np.object)
outprojs[startidx:endidx] = np.asarray(
first_var.attrs["projection"], np.object)[:]
else:
projcached = True
startidx = endidx
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
vardata = wrfnc.variables[varname][:]
numtimes = vardata.shape[0]
endidx = startidx + numtimes
outdata[startidx:endidx, :] = vardata[:]
if xarray_enabled() and meta:
# XTIME new in 3.7
if timename is not None and not timecached:
xtimedata = wrfnc.variables[timename][:]
outxtimes[startidx:endidx] = xtimedata[:]
if is_moving:
if latname is not None and not latcached:
latdata = wrfnc.variables[latname][:]
outlats[startidx:endidx, :] = latdata[:]
if lonname is not None and not loncached:
londata = wrfnc.variables[lonname][:]
outlons[startidx:endidx, :] = londata[:]
if not projcached:
lats, lons, proj_params = get_proj_params(wrfnc,
ALL_TIMES,
varname)
projs = [getproj(lats=lats[i,:],
lons=lons[i,:],
**proj_params) for i in py3range(lats.shape[0])]
outprojs[startidx:endidx] = np.asarray(projs,
np.object)[:]
startidx = endidx
if xarray_enabled() and meta:
# Cache the coords if applicable
if not latcached and outlats is not None:
cache_item(_key, latkey, outlats)
if not loncached and outlons is not None:
cache_item(_key, lonkey, outlons)
if not projcached and outprojs is not None:
cache_item(_key, projkey, outprojs)
if not timecached and outxtimes is not None:
cache_item(_key, timekey, outxtimes)
outname = ucode(first_var.name)
outattrs = OrderedDict(first_var.attrs)
outcoords = OrderedDict(first_var.coords)
outdimnames = list(first_var.dims)
if "Time" not in outdimnames:
outdimnames.insert(0, "Time")
if not multitime:
file_times = [file_times[timeidx]]
outcoords[outdimnames[0]] = file_times
outcoords["datetime"] = outdimnames[0], file_times
if timename is not None:
outxtimes = outxtimes[:]
outcoords[timename] = outdimnames[0], outxtimes
# If the domain is moving, need to create the lat/lon coords
# since they can't be copied
if is_moving:
outlatdims = [outdimnames[0]] + outdimnames[-2:]
if latname is not None:
outlats = outlats[:]
outcoords[latname] = outlatdims, outlats
if lonname is not None:
outlons = outlons[:]
outcoords[lonname] = outlatdims, outlons
outattrs["projection"] = outprojs[:]
outdata = outdata[:]
outarr = DataArray(outdata, name=outname, coords=outcoords,
dims=outdimnames, attrs=outattrs)
else:
outdata = outdata[:]
outarr = outdata
return outarr
def _get_numfiles(wrfseq):
try:
return len(wrfseq)
except TypeError:
wrf_iter = iter(wrfseq)
return sum(1 for _ in wrf_iter)
# TODO: implement in C
def _join_files(wrfseq, varname, timeidx, is_moving, meta, _key):
if is_moving is None:
is_moving = is_moving_domain(wrfseq, varname, _key=_key)
multitime = is_multi_time_req(timeidx)
numfiles = _get_numfiles(wrfseq)
maxtimes = _find_max_time_size(wrfseq)
time_idx_or_slice = timeidx if not multitime else slice(None)
file_times_less_than_max = False
file_idx = 0
# wrfseq might be a generator
wrf_iter = iter(wrfseq)
wrfnc = next(wrf_iter)
numtimes = extract_dim(wrfnc, "Time")
if xarray_enabled() and meta:
first_var = _build_data_array(wrfnc, varname, ALL_TIMES, is_moving,
True, _key)
time_coord = np.full((numfiles, maxtimes), int(NaT), "datetime64[ns]")
time_coord[file_idx, 0:numtimes] = first_var.coords["Time"][:]
else:
first_var = wrfnc.variables[varname][:]
if numtimes < maxtimes:
file_times_less_than_max = True
# Out dimensions will be the number of files, maxtimes, then the
# non-time shapes from the first variable
outdims = [numfiles]
outdims += [maxtimes]
outdims += first_var.shape[1:]
# For join, always need to start with full masked values
outdata = np.full(outdims, Constants.DEFAULT_FILL, first_var.dtype)
outdata[file_idx, 0:numtimes, :] = first_var[:]
# Create the secondary coordinate arrays
if xarray_enabled() and meta:
latname, lonname, timename = _find_coord_names(first_var.coords)
outcoorddims = outdims[0:2] + outdims[-2:]
timecached = False
latcached = False
loncached = False
projcached = False
outxtimes = None
outlats = None
outlons = None
outprojs = None
timekey = timename+"_join" if timename is not None else None
latkey = latname + "_join" if latname is not None else None
lonkey = lonname + "_join" if lonname is not None else None
projkey = "projection_join" if is_moving else None
if timename is not None:
outxtimes = get_cached_item(_key, timekey)
if outxtimes is None:
outxtimes = np.full(outdims[0:2], Constants.DEFAULT_FILL,
first_var.dtype)
outxtimes[file_idx, 0:numtimes] = first_var.coords[timename][:]
else:
timecached = True
if is_moving:
if latname is not None:
outlats = get_cached_item(_key, latkey)
if outlats is None:
outlats = np.full(outcoorddims, Constants.DEFAULT_FILL,
first_var.dtype)
outlats[file_idx, 0:numtimes, :] = (
first_var.coords[latname][:])
else:
latcached = True
if lonname is not None:
outlons = get_cached_item(_key, lonkey)
if outlons is None:
outlons = np.full(outcoorddims, Constants.DEFAULT_FILL,
first_var.dtype)
outlons[file_idx, 0:numtimes, :] = (
first_var.coords[lonname][:])
else:
loncached = True
# Projections also need two dimensions
outprojs = get_cached_item(_key, projkey)
if outprojs is None:
outprojs = np.full(outdims[0:2], NullProjection(), np.object)
outprojs[file_idx, 0:numtimes] = np.asarray(
first_var.attrs["projection"],
np.object)[:]
else:
projcached = True
file_idx=1
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
numtimes = extract_dim(wrfnc, "Time")
if numtimes < maxtimes:
file_times_less_than_max = True
outvar = wrfnc.variables[varname][:]
if not multitime:
outvar = outvar[np.newaxis, :]
outdata[file_idx, 0:numtimes, :] = outvar[:]
if xarray_enabled() and meta:
# For join, the times are a function of fileidx
file_times = extract_times(wrfnc, ALL_TIMES, meta=False,
do_xtime=False)
time_coord[file_idx, 0:numtimes] = np.asarray(file_times,
"datetime64[ns]")[:]
if timename is not None and not timecached:
xtimedata = wrfnc.variables[timename][:]
outxtimes[file_idx, 0:numtimes] = xtimedata[:]
if is_moving:
if latname is not None and not latcached:
latdata = wrfnc.variables[latname][:]
outlats[file_idx, 0:numtimes, :] = latdata[:]
if lonname is not None and not loncached:
londata = wrfnc.variables[lonname][:]
outlons[file_idx, 0:numtimes, :] = londata[:]
if not projcached:
lats, lons, proj_params = get_proj_params(wrfnc,
ALL_TIMES,
varname)
projs = [getproj(lats=lats[i,:],
lons=lons[i,:],
**proj_params) for i in py3range(lats.shape[0])]
outprojs[file_idx, 0:numtimes] = (
np.asarray(projs, np.object)[:])
# Need to update coords here
file_idx += 1
# If any of the output files contain less than the max number of times,
# then a mask array is needed to flag all the missing arrays with
# missing values
if file_times_less_than_max:
outdata = np.ma.masked_values(outdata, Constants.DEFAULT_FILL)
if xarray_enabled() and meta:
# Cache the coords if applicable
if not latcached and outlats is not None:
cache_item(_key, latkey, outlats)
if not loncached and outlons is not None:
cache_item(_key, lonkey, outlons)
if not projcached and outprojs is not None:
cache_item(_key, projkey, outprojs)
if not timecached and outxtimes is not None:
cache_item(_key, timekey, outxtimes)
outname = ucode(first_var.name)
outcoords = OrderedDict(first_var.coords)
outattrs = OrderedDict(first_var.attrs)
# New dimensions
outdimnames = ["file"] + list(first_var.dims)
outcoords["file"] = [i for i in py3range(numfiles)]
# Time needs to be multi dimensional, so use the default dimension
del outcoords["Time"]
time_coord = time_coord[:, time_idx_or_slice]
if not multitime:
time_coord = time_coord[:, np.newaxis]
outcoords["datetime"] = outdimnames[0:2], time_coord
if isinstance(outdata, np.ma.MaskedArray):
outattrs["_FillValue"] = Constants.DEFAULT_FILL
outattrs["missing_value"] = Constants.DEFAULT_FILL
if timename is not None:
outxtimes = outxtimes[:, time_idx_or_slice]
if not multitime:
outxtimes = outxtimes[:, np.newaxis]
outcoords[timename] = outdimnames[0:2], outxtimes[:]
# If the domain is moving, need to create the lat/lon coords
# since they can't be copied
if is_moving:
outlatdims = outdimnames[0:2] + outdimnames[-2:]
if latname is not None:
outlats = outlats[:, time_idx_or_slice, :]
if not multitime:
outlats = outlats[:, np.newaxis, :]
outcoords[latname] = outlatdims, outlats
if lonname is not None:
outlons = outlons[:, time_idx_or_slice, :]
if not multitime:
outlons = outlons[:, np.newaxis, :]
outcoords[lonname] = outlatdims, outlons
if not multitime:
outattrs["projection"] = outprojs[:, timeidx]
else:
outattrs["projection"] = outprojs
if not multitime:
outdata = outdata[:, timeidx, :]
outdata = outdata[:, np.newaxis, :]
outarr = DataArray(outdata, name=outname, coords=outcoords,
dims=outdimnames, attrs=outattrs)
else:
if not multitime:
outdata = outdata[:, timeidx, :]
outdata = outdata[:, np.newaxis, :]
outarr = outdata
return outarr
def combine_files(wrfseq, varname, timeidx, is_moving=None,
method="cat", squeeze=True, meta=True,
_key=None):
# Handles generators, single files, lists, tuples, custom classes
wrfseq = get_iterable(wrfseq)
# Dictionary is unique
if is_mapping(wrfseq):
outarr = _combine_dict(wrfseq, varname, timeidx, method, meta, _key)
elif method.lower() == "cat":
outarr = _cat_files(wrfseq, varname, timeidx, is_moving,
squeeze, meta, _key)
elif method.lower() == "join":
outarr = _join_files(wrfseq, varname, timeidx, is_moving, meta, _key)
else:
raise ValueError("method must be 'cat' or 'join'")
return outarr.squeeze() if squeeze else outarr
# Cache is a dictionary of already extracted variables
def _extract_var(wrfnc, varname, timeidx, is_moving,
method, squeeze, cache, meta, _key):
# Mainly used internally so variables don't get extracted multiple times,
# particularly to copy metadata. This can be slow.
if cache is not None:
try:
cache_var = cache[varname]
except KeyError:
pass
else:
if not meta:
return npvalues(cache_var)
return cache_var
multitime = is_multi_time_req(timeidx)
multifile = is_multi_file(wrfnc)
if is_time_coord_var(varname):
return extract_times(wrfnc, timeidx, method, squeeze, cache,
meta, do_xtime=True)
if not multifile:
if xarray_enabled() and meta:
if is_moving is None:
is_moving = is_moving_domain(wrfnc, varname, _key=_key)
result = _build_data_array(wrfnc, varname, timeidx, is_moving,
multifile, _key)
else:
if not multitime:
result = wrfnc.variables[varname][timeidx,:]
result = result[np.newaxis, :] # So that no squeeze works
else:
result = wrfnc.variables[varname][:]
else:
# Squeeze handled in this routine, so just return it
return combine_files(wrfnc, varname, timeidx, is_moving,
method, squeeze, meta, _key)
return result.squeeze() if squeeze else result
def extract_vars(wrfnc, timeidx, varnames, method="cat", squeeze=True,
cache=None, meta=True, _key=None):
if isstr(varnames):
varlist = [varnames]
else:
varlist = varnames
return {var:_extract_var(wrfnc, var, timeidx, None,
method, squeeze, cache, meta, _key)
for var in varlist}
# Python 3 compatability
def npbytes_to_str(var):
return (bytes(c).decode("utf-8") for c in var[:])
def _make_time(timearr):
return dt.datetime.strptime("".join(npbytes_to_str(timearr)),
"%Y-%m-%d_%H:%M:%S")
def _file_times(wrfnc, do_xtime):
if not do_xtime:
times = wrfnc.variables["Times"][:,:]
for i in py3range(times.shape[0]):
yield _make_time(times[i,:])
else:
xtimes = wrfnc.variables["XTIME"][:]
for i in py3range(xtimes.shape[0]):
yield xtimes[i]
def _extract_time_map(wrfnc, timeidx, do_xtime, meta=False):
return {key : extract_times(wrfseq, timeidx, do_xtime, meta)
for key, wrfseq in viewitems(wrfnc)}
def extract_times(wrfnc, timeidx, method="cat", squeeze=True, cache=None,
meta=False, do_xtime=False):
if is_mapping(wrfnc):
return _extract_time_map(wrfnc, timeidx, do_xtime)
multitime = is_multi_time_req(timeidx)
multi_file = is_multi_file(wrfnc)
if not multi_file:
wrf_list = [wrfnc]
else:
wrf_list = wrfnc
try:
if method.lower() == "cat":
time_list = [file_time
for wrf_file in wrf_list
for file_time in _file_times(wrf_file, do_xtime)]
elif method.lower() == "join":
time_list = [[file_time
for file_time in _file_times(wrf_file, do_xtime)]
for wrf_file in wrf_list]
else:
raise ValueError("invalid method argument '{}'".format(method))
except KeyError:
return None # Thrown for pre-3.7 XTIME not existing
if xarray_enabled() and meta:
outattrs = OrderedDict()
outcoords = None
if method.lower() == "cat":
outdimnames = ["Time"]
else:
outdimnames = ["fileidx", "Time"]
if not do_xtime:
outname = "times"
outattrs["description"] = "model times [np.datetime64]"
else:
ncfile = next(iter(wrf_list))
var = ncfile.variables["XTIME"]
outattrs.update(var.__dict__)
outname = "XTIME"
outarr = DataArray(time_list, name=outname, coords=outcoords,
dims=outdimnames, attrs=outattrs)
else:
outarr = np.asarray(time_list)
if not multitime:
return outarr[timeidx]
return outarr
def is_standard_wrf_var(wrfnc, var):
multifile = is_multi_file(wrfnc)
if multifile:
if not is_mapping(wrfnc):
wrfnc = next(iter(wrfnc))
else:
entry = wrfnc[next(iter(viewkeys(wrfnc)))]
return is_standard_wrf_var(entry, var)
return var in wrfnc.variables
def is_staggered(var, wrfnc):
we = extract_dim(wrfnc, "west_east")
sn = extract_dim(wrfnc, "south_north")
bt = extract_dim(wrfnc, "bottom_top")
if (var.shape[-1] != we or var.shape[-2] != sn or var.shape[-3] != bt):
return True
return False
def get_left_indexes(ref_var, expected_dims):
"""Returns the extra left side dimensions for a variable with an expected
shape.
For example, if a 2D variable contains an additional left side dimension
for time, this will return the time dimension size.
"""
extra_dim_num = ref_var.ndim - expected_dims
if (extra_dim_num == 0):
return []
return tuple([ref_var.shape[x] for x in py3range(extra_dim_num)])
def iter_left_indexes(dims):
"""A generator which yields the iteration tuples for a sequence of
dimensions sizes.
For example, if an array shape is (3,3), then this will yield:
(0,0), (0,1), (1,0), (1,1)
Arguments:
- dims - a sequence of dimensions sizes (e.g. ndarry.shape)
"""
arg = [py3range(dim) for dim in dims]
for idxs in product(*arg):
yield idxs
def get_right_slices(var, right_ndims, fixed_val=0):
extra_dim_num = var.ndim - right_ndims
if extra_dim_num == 0:
return [slice(None)] * right_ndims
return tuple([fixed_val]*extra_dim_num +
[slice(None)]*right_ndims)
def get_proj_params(wrfnc, timeidx=0, varname=None):
proj_params = extract_global_attrs(wrfnc, attrs=("MAP_PROJ",
"CEN_LAT", "CEN_LON",
"TRUELAT1", "TRUELAT2",
"MOAD_CEN_LAT", "STAND_LON",
"POLE_LAT", "POLE_LON"))
multitime = is_multi_time_req(timeidx)
if not multitime:
time_idx_or_slice = timeidx
else:
time_idx_or_slice = slice(None)
if varname is not None:
if not is_coordvar(varname):
coord_names = getattr(wrfnc.variables[varname],
"coordinates").split()
lon_coord = coord_names[0]
lat_coord = coord_names[1]
else:
lat_coord, lon_coord = get_coord_pairs(varname)
else:
lat_coord, lon_coord = latlon_coordvars(wrfnc.variables)
return (wrfnc.variables[lat_coord][time_idx_or_slice,:],
wrfnc.variables[lon_coord][time_idx_or_slice,:],
proj_params)
class CoordPair(object):
def __init__(self, x=None, y=None, i=None, j=None, lat=None, lon=None):
self.x = x
self.y = y
self.i = i
self.j = j
self.lat = lat
self.lon = lon
def __repr__(self):
args = []
if self.x is not None:
args.append("x={}".format(self.x))
args.append("y={}".format(self.y))
if self.i is not None:
args.append("i={}".format(self.i))
args.append("j={}".format(self.j))
if self.lat is not None:
args.append("lat={}".format(self.lat))
args.append("lon={}".format(self.lon))
argstr = ", ".join(args)
return "{}({})".format(self.__class__.__name__, argstr)
def __str__(self):
return self.__repr__()
def xy_str(self, fmt="{:.4f}, {:.4f}"):
if self.x is None or self.y is None:
return None
return fmt.format(self.x, self.y)
def latlon_str(self, fmt="{:.4f}, {:.4f}"):
if self.lat is None or self.lon is None:
return None
return fmt.format(self.lat, self.lon)
def ij_str(self, fmt="{:.4f}, {:.4f}"):
if self.i is None or self.j is None:
return None
return fmt.format(self.i, self.j)
def from_args(func, argnames, *args, **kwargs):
"""Parses the function args and kargs looking for the desired argument
value. Otherwise, the value is taken from the default keyword argument
using the arg spec.
"""
if isstr(argnames):
arglist = [argnames]
else:
arglist = argnames
result = {}
for argname in arglist:
arg_loc = arg_location(func, argname, args, kwargs)
if arg_loc is not None:
result[argname] = arg_loc[0][arg_loc[1]]
else:
result[argname] = None
return result
def _args_to_list2(func, args, kwargs):
argspec = getargspec(func)
# Build the full tuple with defaults filled in
outargs = [None]*len(argspec.args)
if argspec.defaults is not None:
for i,default in enumerate(argspec.defaults[::-1], 1):
outargs[-i] = default
# Add the supplied args
for i,arg in enumerate(args):
outargs[i] = arg
# Fill in the supplied kargs
for argname,val in viewitems(kwargs):
argidx = argspec.args.index(argname)
outargs[argidx] = val
return outargs
def _args_to_list3(func, args, kwargs):
sig = signature(func)
bound = sig.bind(*args, **kwargs)
bound.apply_defaults()
return [x for x in bound.arguments.values()]
def args_to_list(func, args, kwargs):
"""Converts the mixed args/kwargs to a single list of args"""
if version_info > (3,):
_args_to_list = _args_to_list3
else:
_args_to_list = _args_to_list2
return _args_to_list(func, args, kwargs)
def _arg_location2(func, argname, args, kwargs):
argspec = getargspec(func)
list_args = _args_to_list2(func, args, kwargs)
# Return the new sequence and location
if argname not in argspec.args and argname not in kwargs:
return None
result_idx = argspec.args.index(argname)
return list_args, result_idx
def _arg_location3(func, argname, args, kwargs):
sig = signature(func)
params = list(sig.parameters.keys())
list_args = _args_to_list3(func, args, kwargs)
try:
result_idx = params.index(argname)
except ValueError:
return None
return list_args, result_idx
def arg_location(func, argname, args, kwargs):
"""Parses the function args, kargs and signature looking for the desired
argument location (either in args, kargs, or argspec.defaults),
and returns a list containing representing all arguments in the
correct order with defaults filled in.
"""
if version_info > (3,):
_arg_location = _arg_location3
else:
_arg_location = _arg_location2
return _arg_location(func, argname, args, kwargs)
def psafilepath():
return os.path.join(os.path.dirname(__file__), "data", "psadilookup.dat")
def get_id(seq):
if not is_mapping(seq):
return id(seq)
# For each key in the mapping, recurisvely call get_id until
# until a non-mapping is found
return {key : get_id(val) for key,val in viewitems(seq)}