A collection of diagnostic and interpolation routines for use with output from the Weather Research and Forecasting (WRF-ARW) Model.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1387 lines
44 KiB

from __future__ import (absolute_import, division, print_function,
unicode_literals)
from sys import version_info
from copy import copy
from collections import Iterable, Mapping, OrderedDict
from itertools import product
from types import GeneratorType
import datetime as dt
from math import floor, copysign
from inspect import getmodule
try:
from inspect import signature
except ImportError:
from inspect import getargspec
try:
from inspect import getargvalues
except ImportError:
from inspect import getgeneratorlocals
import numpy as np
import numpy.ma as ma
from .config import xarray_enabled
from .projection import getproj, NullProjection
from .constants import Constants, ALL_TIMES
if xarray_enabled():
from xarray import DataArray
from pandas import NaT
__all__ = ["extract_vars", "extract_global_attrs", "extract_dim",
"combine_files", "is_standard_wrf_var", "extract_times",
"iter_left_indexes", "get_left_indexes", "get_right_slices",
"is_staggered", "get_proj_params", "viewitems", "viewkeys",
"viewvalues", "py2round", "py3range", "combine_with", "either",
"from_args", "arg_location", "args_to_list", "npvalues",
"CoordPair"]
_COORD_PAIR_MAP = {"XLAT" : ("XLAT", "XLONG"),
"XLONG" : ("XLAT", "XLONG"),
"XLAT_M" : ("XLAT_M", "XLONG_M"),
"XLONG_M" : ("XLAT_M", "XLONG_M"),
"XLAT_U" : ("XLAT_U", "XLONG_U"),
"XLONG_U" : ("XLAT_U", "XLONG_U"),
"XLAT_V" : ("XLAT_V", "XLONG_V"),
"XLONG_V" : ("XLAT_V", "XLONG_V"),
"CLAT" : ("CLAT", "CLONG"),
"CLONG" : ("CLAT", "CLONG")}
_COORD_VARS = ("XLAT", "XLONG", "XLAT_M", "XLONG_M", "XLAT_U", "XLONG_U",
"XLAT_V", "XLONG_V", "CLAT", "CLONG")
_TIME_COORD_VARS = ("XTIME",)
def _is_coord_var(varname):
return varname in _COORD_VARS
def _is_time_coord_var(varname):
return varname in _TIME_COORD_VARS
def _get_coord_pairs(varname):
return _COORD_PAIR_MAP[varname]
def _is_multi_time_req(timeidx):
return timeidx is None
def _is_multi_file(wrfnc):
return (isinstance(wrfnc, Iterable) and not isstr(wrfnc))
def _has_time_coord(wrfnc):
return "XTIME" in wrfnc.variables
def _is_mapping(wrfnc):
return isinstance(wrfnc, Mapping)
def _generator_copy(gen):
funcname = gen.__name__
try:
argvals = getargvalues(gen.gi_frame)
except NameError:
argvals = getgeneratorlocals(gen)
module = getmodule(gen.gi_frame)
if module is not None:
res = module.get(funcname)(**argvals.locals)
else:
# Created in jupyter or the python interpreter
import __main__
res = getattr(__main__, funcname)(**argvals.locals)
return res
def test():
q = [1,2,3]
for i in q:
yield i
class TestGen(object):
def __init__(self, count=3):
self._total = count
self._i = 0
def __iter__(self):
return self
def next(self):
if self._i >= self._total:
raise StopIteration
else:
val = self._i
self._i += 1
return val
# Python 3
def __next__(self):
return self.next()
class IterWrapper(Iterable):
"""A wrapper class for generators and custom iterable classes which returns
a new iterator from the start of the sequence when __iter__ is called"""
def __init__(self, wrapped):
self._wrapped = wrapped
def __iter__(self):
if isinstance(self._wrapped, GeneratorType):
return _generator_copy(self._wrapped)
else:
obj_copy = copy(self._wrapped)
return obj_copy.__iter__()
def _get_iterable(wrfseq):
"""Returns a resetable iterable object."""
if not _is_multi_file(wrfseq):
return wrfseq
else:
if not _is_mapping(wrfseq):
if isinstance(wrfseq, (list, tuple, IterWrapper)):
return wrfseq
else:
return IterWrapper(wrfseq) # generator/custom iterable class
else:
if isinstance(wrfseq, dict):
return wrfseq
else:
return dict(wrfseq) # generator/custom iterable class
# Dictionary python 2-3 compatibility stuff
def viewitems(d):
func = getattr(d, "viewitems", None)
if func is None:
func = d.items
return func()
def viewkeys(d):
func = getattr(d, "viewkeys", None)
if func is None:
func = d.keys
return func()
def viewvalues(d):
func = getattr(d, "viewvalues", None)
if func is None:
func = d.values
return func()
def isstr(s):
try:
return isinstance(s, basestring)
except NameError:
return isinstance(s, str)
# Python 2 rounding behavior
def _round2(x, d=0):
p = 10 ** d
return float(floor((x * p) + copysign(0.5, x)))/p
def py2round(x, d=0):
if version_info >= (3,):
return _round2(x, d)
return round(x, d)
def py3range(*args):
if version_info >= (3,):
return range(*args)
return xrange(*args)
def ucode(*args, **kwargs):
if version_info >= (3, ):
return str(*args, **kwargs)
return unicode(*args, **kwargs)
# Helper to extract masked arrays from DataArrays that convert to NaN
def npvalues(da):
if not isinstance(da, DataArray):
result = da
else:
try:
fill_value = da.attrs["_FillValue"]
except KeyError:
result = da.values
else:
result = ma.masked_invalid(da.values, copy=False)
result.set_fill_value(fill_value)
return result
# Helper utilities for metadata
class either(object):
def __init__(self, *varnames):
self.varnames = varnames
def __call__(self, wrfnc):
if _is_multi_file(wrfnc):
if not _is_mapping(wrfnc):
wrfnc = next(iter(wrfnc))
else:
entry = wrfnc[next(iter(viewkeys(wrfnc)))]
return self(entry)
for varname in self.varnames:
if varname in wrfnc.variables:
return varname
raise ValueError("{} are not valid variable names".format(
self.varnames))
class combine_with(object):
# Remove remove_idx first, then insert_idx is applied to removed set
def __init__(self, varname, remove_dims=None, insert_before=None,
new_dimnames=None, new_coords=None):
self.varname = varname
self.remove_dims = remove_dims
self.insert_before = insert_before
self.new_dimnames = new_dimnames if new_dimnames is not None else []
self.new_coords = (new_coords if new_coords is not None
else OrderedDict())
def __call__(self, var):
new_dims = list(var.dims)
new_coords = OrderedDict(var.coords)
if self.remove_dims is not None:
for dim in self.remove_dims:
new_dims.remove(dim)
del new_coords[dim]
if self.insert_before is not None:
insert_idx = new_dims.index(self.insert_before)
new_dims = (new_dims[0:insert_idx] + self.new_dimnames +
new_dims[insert_idx:])
elif self.new_dimnames is not None:
new_dims = self.new_dimnames
if self.new_coords is not None:
new_coords.update(self.new_coords)
return new_dims, new_coords
def _corners_moved(wrfnc, first_ll_corner, first_ur_corner, latvar, lonvar):
lats = wrfnc.variables[latvar]
lons = wrfnc.variables[lonvar]
# Need to check all times
for i in py3range(lats.shape[-3]):
start_idxs = [0]*len(lats.shape) # PyNIO does not support ndim
start_idxs[-3] = i
start_idxs = tuple(start_idxs)
end_idxs = [-1]*len(lats.shape)
end_idxs[-3] = i
end_idxs = tuple(end_idxs)
if (first_ll_corner[0] != lats[start_idxs] or
first_ll_corner[1] != lons[start_idxs] or
first_ur_corner[0] != lats[end_idxs] or
first_ur_corner[1] != lons[end_idxs]):
return True
return False
def _is_moving_domain(wrfseq, varname=None, latvar=either("XLAT", "XLAT_M"),
lonvar=either("XLONG", "XLONG_M")):
if isinstance(latvar, either):
latvar = latvar(wrfseq)
if isinstance(lonvar, either):
lonvar = lonvar(wrfseq)
# In case it's just a single file
if not _is_multi_file(wrfseq):
wrfseq = [wrfseq]
# Slow, but safe. Compare the corner points to the first item and see
# any move. User iterator protocol in case wrfseq is not a list/tuple.
if not _is_mapping(wrfseq):
wrf_iter = iter(wrfseq)
first_wrfnc = next(wrf_iter)
else:
entry = wrfseq[next(iter(viewkeys(wrfseq)))]
return _is_moving_domain(entry, varname, latvar, lonvar)
# Quick check on pressure coordinates, bypassing the need to search the
# domain corner points
try:
coord_names = getattr(first_wrfnc.variables["P"],
"coordinates").split()
except KeyError:
pass
else:
if "XTIME" in coord_names:
return True
else:
return False
# The long way of checking all lat/lon corner points
if varname is not None:
try:
coord_names = getattr(first_wrfnc.variables[varname],
"coordinates").split()
except AttributeError:
# Variable doesn't have a coordinates attribute, use the
# arguments
lon_coord = lonvar
lat_coord = latvar
else:
# If the XTIME variable is found to be a coordinate variable,
# then it's a moving domain file
try:
xtime_coord = coord_names[2]
except IndexError:
# XTIME is not a coordinate variable, if the variable is in the
# file, then this is not a moving domain file
if "XTIME" in first_wrfnc.variables:
return False
else:
# XTIME is a coordinate, so this is a moving domain file
if xtime_coord == "XTIME":
return True
lon_coord = coord_names[0]
lat_coord = coord_names[1]
else:
lon_coord = lonvar
lat_coord = latvar
# Probably a met_em file, need to search all the files
lats = first_wrfnc.variables[lat_coord]
lons = first_wrfnc.variables[lon_coord]
zero_idxs = [0]*len(lats.shape) # PyNIO doesn't have ndim
last_idxs = list(zero_idxs)
last_idxs[-2:] = [-1]*2
zero_idxs = tuple(zero_idxs)
last_idxs = tuple(last_idxs)
lat0 = lats[zero_idxs]
lat1 = lats[last_idxs]
lon0 = lons[zero_idxs]
lon1 = lons[last_idxs]
ll_corner = (lat0, lon0)
ur_corner = (lat1, lon1)
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
if _corners_moved(wrfnc, ll_corner, ur_corner,
lat_coord, lon_coord):
return True
return False
def _get_global_attr(wrfnc, attr):
val = getattr(wrfnc, attr, None)
# PyNIO puts single values in to an array
if isinstance(val, np.ndarray):
if len(val) == 1:
return val[0]
return val
def extract_global_attrs(wrfnc, attrs):
if isstr(attrs):
attrlist = [attrs]
else:
attrlist = attrs
multifile = _is_multi_file(wrfnc)
if multifile:
if not _is_mapping(wrfnc):
wrfnc = next(iter(wrfnc))
else:
entry = wrfnc[next(iter(viewkeys(wrfnc)))]
return extract_global_attrs(entry, attrs)
return {attr:_get_global_attr(wrfnc, attr) for attr in attrlist}
def extract_dim(wrfnc, dim):
if _is_multi_file(wrfnc):
if not _is_mapping(wrfnc):
wrfnc = next(iter(wrfnc))
else:
entry = wrfnc[next(iter(viewkeys(wrfnc)))]
return extract_dim(entry, dim)
d = wrfnc.dimensions[dim]
if not isinstance(d, int):
return len(d) #netCDF4
return d # PyNIO
def _combine_dict(wrfdict, varname, timeidx, method, meta):
"""Dictionary combination creates a new left index for each key, then
does a cat or join for the list of files for that key"""
keynames = []
numkeys = len(wrfdict)
key_iter = iter(viewkeys(wrfdict))
first_key = next(key_iter)
keynames.append(first_key)
is_moving = _is_moving_domain(wrfdict, varname)
first_array = _extract_var(wrfdict[first_key], varname,
timeidx, is_moving=is_moving, method=method,
squeeze=False, cache=None, meta=meta)
# Create the output data numpy array based on the first array
outdims = [numkeys]
outdims += first_array.shape
outdata = np.empty(outdims, first_array.dtype)
outdata[0,:] = first_array[:]
idx = 1
while True:
try:
key = next(key_iter)
except StopIteration:
break
else:
keynames.append(key)
vardata = _extract_var(wrfdict[key], varname, timeidx,
is_moving=is_moving, method=method,
squeeze=False, cache=None, meta=meta)
if outdata.shape[1:] != vardata.shape:
raise ValueError("data sequences must have the "
"same size for all dictionary keys")
outdata[idx,:] = npvalues(vardata)[:]
idx += 1
if xarray_enabled() and meta:
outname = str(first_array.name)
# Note: assumes that all entries in dict have same coords
outcoords = OrderedDict(first_array.coords)
outdims = ["key"] + list(first_array.dims)
outcoords["key"] = keynames
outattrs = OrderedDict(first_array.attrs)
outarr = DataArray(outdata, name=outname, coords=outcoords,
dims=outdims, attrs=outattrs)
else:
outarr = outdata
return outarr
def _find_coord_names(coords):
try:
lat_coord = [name for name in _COORD_VARS[0::2] if name in coords][0]
except IndexError:
lat_coord = None
try:
lon_coord = [name for name in _COORD_VARS[1::2] if name in coords][0]
except IndexError:
lon_coord = None
try:
xtime_coord = [name for name in _TIME_COORD_VARS if name in coords][0]
except IndexError:
xtime_coord = None
return lat_coord, lon_coord, xtime_coord
def _find_max_time_size(wrfseq):
wrf_iter = iter(wrfseq)
max_times = 0
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
t = extract_dim(wrfnc, "Time")
max_times = t if t >= max_times else max_times
return max_times
def _build_data_array(wrfnc, varname, timeidx, is_moving_domain):
multitime = _is_multi_time_req(timeidx)
time_idx_or_slice = timeidx if not multitime else slice(None)
var = wrfnc.variables[varname]
data = var[time_idx_or_slice, :]
# Want to preserve the time dimension
if not multitime:
data = data[np.newaxis, :]
attrs = OrderedDict(var.__dict__)
dimnames = var.dimensions[-data.ndim:]
# WRF variables will have a coordinates attribute. MET_EM files have
# a stagger attribute which indicates the coordinate variable.
try:
# WRF files
coord_attr = getattr(var, "coordinates")
except AttributeError:
if _is_coord_var(varname):
# Coordinate variable (most likely XLAT or XLONG)
lat_coord, lon_coord = _get_coord_pairs(varname)
time_coord = None
if is_moving_domain and _has_time_coord(wrfnc):
time_coord = "XTIME"
else:
try:
# met_em files
stag_attr = getattr(var, "stagger")
except AttributeError:
lon_coord = None
lat_coord = None
else:
# For met_em files, use the stagger name to get the lat/lon var
lat_coord = "XLAT_{}".format(stag_attr)
lon_coord = "XLONG_{}".format(stag_attr)
else:
coord_names = coord_attr.split()
lon_coord = coord_names[0]
lat_coord = coord_names[1]
try:
time_coord = coord_names[2]
except IndexError:
time_coord = None
coords = OrderedDict()
# Handle lat/lon coordinates and projection information if available
if lon_coord is not None and lat_coord is not None:
lon_coord_var = wrfnc.variables[lon_coord]
lat_coord_var = wrfnc.variables[lat_coord]
time_coord_var = (wrfnc.variables[time_coord]
if time_coord is not None
else None)
if multitime:
if is_moving_domain:
# Special case with a moving domain in a multi-time file,
# otherwise the projection parameters don't change
coords[lon_coord] = lon_coord_var.dimensions, lon_coord_var[:]
coords[lat_coord] = lat_coord_var.dimensions, lat_coord_var[:]
# Returned lats/lons arrays will have a time dimension, so proj
# will need to be a list due to moving corner points
lats, lons, proj_params = get_proj_params(wrfnc,
timeidx,
varname)
proj = [getproj(lats=lats[i,:],
lons=lons[i,:],
**proj_params) for i in py3range(lats.shape[0])]
if time_coord is not None:
coords[time_coord] = (lon_coord_var.dimensions[0],
time_coord_var[:])
else:
coords[lon_coord] = (lon_coord_var.dimensions[1:],
lon_coord_var[0,:])
coords[lat_coord] = (lat_coord_var.dimensions[1:],
lat_coord_var[0,:])
# Domain not moving, so just get the first time
lats, lons, proj_params = get_proj_params(wrfnc, 0, varname)
proj = getproj(lats=lats, lons=lons, **proj_params)
else:
coords[lon_coord] = (lon_coord_var.dimensions[1:],
lon_coord_var[timeidx,:])
coords[lat_coord] = (lat_coord_var.dimensions[1:],
lat_coord_var[timeidx,:])
lats, lons, proj_params = get_proj_params(wrfnc, 0, varname)
proj = getproj(lats=lats, lons=lons, **proj_params)
attrs["projection"] = proj
if dimnames[0] == "Time":
coords[dimnames[0]] = extract_times(wrfnc, timeidx)
data_array = DataArray(data, name=varname, dims=dimnames, coords=coords,
attrs=attrs)
return data_array
def _find_forward(wrfseq, varname, timeidx, is_moving, meta):
wrf_iter = iter(wrfseq)
comboidx = 0
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
numtimes = extract_dim(wrfnc, "Time")
if timeidx < comboidx + numtimes:
filetimeidx = timeidx - comboidx
if meta:
return _build_data_array(wrfnc, varname, filetimeidx,
is_moving)
else:
return wrfnc.variables[varname][filetimeidx, :]
else:
comboidx += numtimes
raise IndexError("invalid time index")
def _find_reverse(wrfseq, varname, timeidx, is_moving, meta):
try:
revwrfseq = reversed(wrfseq)
except TypeError:
revwrfseq = reversed(list(wrfseq))
wrf_iter = iter(revwrfseq)
revtimeidx = -timeidx - 1
comboidx = 0
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
numtimes = extract_dim(wrfnc, "Time")
if revtimeidx < comboidx + numtimes:
# Finds the "forward" sequence index, then counts that
# number back from the back of the ncfile times,
# since the ncfile needs to be iterated backwards as well.
filetimeidx = numtimes - (revtimeidx - comboidx) - 1
if meta:
return _build_data_array(wrfnc, varname, filetimeidx,
is_moving)
else:
return wrfnc.variables[varname][filetimeidx, :]
else:
comboidx += numtimes
raise IndexError("invalid time index")
def _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta):
if timeidx >= 0:
return _find_forward(wrfseq, varname, timeidx, is_moving, meta)
else:
return _find_reverse(wrfseq, varname, timeidx, is_moving, meta)
# TODO: implement in C
def _cat_files(wrfseq, varname, timeidx, is_moving, squeeze, meta):
if is_moving is None:
is_moving = _is_moving_domain(wrfseq, varname)
file_times = extract_times(wrfseq, ALL_TIMES)
multitime = _is_multi_time_req(timeidx)
# For single times, just need to find the ncfile and appropriate
# time index, and return that array
if not multitime:
return _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta)
#time_idx_or_slice = timeidx if not multitime else slice(None)
# If all times are requested, need to build a new array and cat together
# all of the arrays in the sequence
wrf_iter = iter(wrfseq)
if xarray_enabled() and meta:
first_var = _build_data_array(next(wrf_iter), varname,
ALL_TIMES, is_moving)
else:
first_var = (next(wrf_iter)).variables[varname][:]
outdims = [len(file_times)]
# Making a new time dim, so ignore this one
outdims += first_var.shape[1:]
outdata = np.empty(outdims, first_var.dtype)
numtimes = first_var.shape[0]
startidx = 0
endidx = numtimes
outdata[startidx:endidx, :] = first_var[:]
if xarray_enabled() and meta and is_moving:
latname, lonname, timename = _find_coord_names(first_var.coords)
outcoorddims = outdims[0:1] + outdims[-2:]
if latname is not None:
outlats = np.empty(outcoorddims, first_var.dtype)
outlats[startidx:endidx, :] = first_var.coords[latname][:]
if lonname is not None:
outlons = np.empty(outcoorddims, first_var.dtype)
outlons[startidx:endidx, :] = first_var.coords[lonname][:]
if timename is not None:
outxtimes = np.empty(outdims[0])
outxtimes[startidx:endidx] = first_var.coords[timename][:]
# Projections also need to be aggregated
outprojs = np.empty(outdims[0], np.object)
outprojs[startidx:endidx] = np.asarray(first_var.attrs["projection"],
np.object)[:]
startidx = endidx
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
vardata = wrfnc.variables[varname][:]
numtimes = vardata.shape[0]
endidx = startidx + numtimes
outdata[startidx:endidx, :] = vardata[:]
if xarray_enabled() and meta and is_moving:
if latname is not None:
latdata = wrfnc.variables[latname][:]
outlats[startidx:endidx, :] = latdata[:]
if lonname is not None:
londata = wrfnc.variables[lonname][:]
outlons[startidx:endidx, :] = londata[:]
if timename is not None:
xtimedata = wrfnc.variables[timename][:]
outxtimes[startidx:endidx] = xtimedata[:]
lats, lons, proj_params = get_proj_params(wrfnc,
ALL_TIMES,
varname)
projs = [getproj(lats=lats[i,:],
lons=lons[i,:],
**proj_params) for i in py3range(lats.shape[0])]
outprojs[startidx:endidx] = np.asarray(projs, np.object)[:]
startidx = endidx
if xarray_enabled() and meta:
outname = ucode(first_var.name)
outattrs = OrderedDict(first_var.attrs)
outcoords = OrderedDict(first_var.coords)
outdimnames = list(first_var.dims)
if "Time" not in outdimnames:
outdimnames.insert(0, "Time")
if not multitime:
file_times = [file_times[timeidx]]
outcoords[outdimnames[0]] = file_times
outcoords["datetime"] = outdimnames[0], file_times
# If the domain is moving, need to create the lat/lon/xtime coords
# since they can't be copied
if is_moving:
outlatdims = [outdimnames[0]] + outdimnames[-2:]
if latname is not None:
outlats = outlats[:]
outcoords[latname] = outlatdims, outlats
if lonname is not None:
outlons = outlons[:]
outcoords[lonname] = outlatdims, outlons
if timename is not None:
outxtimes = outxtimes[:]
outcoords[timename] = outdimnames[0], outxtimes
outattrs["projection"] = outprojs[:]
outdata = outdata[:]
outarr = DataArray(outdata, name=outname, coords=outcoords,
dims=outdimnames, attrs=outattrs)
else:
outdata = outdata[:]
outarr = outdata
return outarr
def _get_numfiles(wrfseq):
try:
return len(wrfseq)
except TypeError:
wrf_iter = iter(wrfseq)
return sum(1 for _ in wrf_iter)
# TODO: implement in C
def _join_files(wrfseq, varname, timeidx, is_moving, meta):
if is_moving is None:
is_moving = _is_moving_domain(wrfseq, varname)
multitime = _is_multi_time_req(timeidx)
numfiles = _get_numfiles(wrfseq)
maxtimes = _find_max_time_size(wrfseq)
time_idx_or_slice = timeidx if not multitime else slice(None)
file_times_less_than_max = False
file_idx = 0
# wrfseq might be a generator
wrf_iter = iter(wrfseq)
wrfnc = next(wrf_iter)
numtimes = extract_dim(wrfnc, "Time")
if xarray_enabled() and meta:
first_var = _build_data_array(wrfnc, varname, ALL_TIMES, is_moving)
time_coord = np.full((numfiles, maxtimes), int(NaT), "datetime64[ns]")
time_coord[file_idx, 0:numtimes] = first_var.coords["Time"][:]
else:
first_var = wrfnc.variables[varname][:]
if numtimes < maxtimes:
file_times_less_than_max = True
# Out dimensions will be the number of files, maxtimes, then the
# non-time shapes from the first variable
outdims = [numfiles]
outdims += [maxtimes]
outdims += first_var.shape[1:]
# For join, always need to start with full masked values
outdata = np.full(outdims, Constants.DEFAULT_FILL, first_var.dtype)
outdata[file_idx, 0:numtimes, :] = first_var[:]
# Create the secondary coordinate arrays
if xarray_enabled() and meta and is_moving:
latname, lonname, timename = _find_coord_names(first_var.coords)
outcoorddims = outdims[0:2] + outdims[-2:]
if latname is not None:
outlats = np.full(outcoorddims, Constants.DEFAULT_FILL,
first_var.dtype)
outlats[file_idx, 0:numtimes, :] = first_var.coords[latname][:]
if lonname is not None:
outlons = np.full(outcoorddims, Constants.DEFAULT_FILL,
first_var.dtype)
outlons[file_idx, 0:numtimes, :] = first_var.coords[lonname][:]
if timename is not None:
outxtimes = np.full(outdims[0:2], Constants.DEFAULT_FILL,
first_var.dtype)
outxtimes[file_idx, 0:numtimes] = first_var.coords[timename][:]
# Projections also need two dimensions
outprojs = np.full(outdims[0:2], NullProjection(), np.object)
outprojs[file_idx, 0:numtimes] = np.asarray(
first_var.attrs["projection"],
np.object)[:]
file_idx=1
while True:
try:
wrfnc = next(wrf_iter)
except StopIteration:
break
else:
numtimes = extract_dim(wrfnc, "Time")
if numtimes < maxtimes:
file_times_less_than_max = True
outvar = wrfnc.variables[varname][:]
if not multitime:
outvar = outvar[np.newaxis, :]
outdata[file_idx, 0:numtimes, :] = outvar[:]
if xarray_enabled() and meta:
file_times = extract_times(wrfnc, ALL_TIMES)
time_coord[file_idx, 0:numtimes] = np.asarray(file_times,
"datetime64[ns]")[:]
if xarray_enabled() and meta and is_moving:
if latname is not None:
latdata = wrfnc.variables[latname][:]
outlats[file_idx, 0:numtimes, :] = latdata[:]
if lonname is not None:
londata = wrfnc.variables[lonname][:]
outlons[file_idx, 0:numtimes, :] = londata[:]
if timename is not None:
xtimedata = wrfnc.variables[timename][:]
outxtimes[file_idx, 0:numtimes] = xtimedata[:]
lats, lons, proj_params = get_proj_params(wrfnc,
ALL_TIMES,
varname)
projs = [getproj(lats=lats[i,:],
lons=lons[i,:],
**proj_params) for i in py3range(lats.shape[0])]
outprojs[file_idx, 0:numtimes] = (
np.asarray(projs, np.object)[:])
# Need to update coords here
file_idx += 1
# If any of the output files contain less than the max number of times,
# then a mask array is needed to flag all the missing arrays with
# missing values
if file_times_less_than_max:
outdata = np.ma.masked_values(outdata, Constants.DEFAULT_FILL)
if xarray_enabled() and meta:
outname = ucode(first_var.name)
outcoords = OrderedDict(first_var.coords)
outattrs = OrderedDict(first_var.attrs)
# New dimensions
outdimnames = ["file"] + list(first_var.dims)
outcoords["file"] = [i for i in py3range(numfiles)]
# Time needs to be multi dimensional, so use the default dimension
del outcoords["Time"]
time_coord = time_coord[:, time_idx_or_slice]
if not multitime:
time_coord = time_coord[:, np.newaxis]
outcoords["datetime"] = outdimnames[0:2], time_coord
if isinstance(outdata, np.ma.MaskedArray):
outattrs["_FillValue"] = Constants.DEFAULT_FILL
outattrs["missing_value"] = Constants.DEFAULT_FILL
# If the domain is moving, need to create the lat/lon/xtime coords
# since they can't be copied
if is_moving:
outlatdims = outdimnames[0:2] + outdimnames[-2:]
if latname is not None:
outlats = outlats[:, time_idx_or_slice, :]
if not multitime:
outlats = outlats[:, np.newaxis, :]
outcoords[latname] = outlatdims, outlats
if lonname is not None:
outlons = outlons[:, time_idx_or_slice, :]
if not multitime:
outlons = outlons[:, np.newaxis, :]
outcoords[lonname] = outlatdims, outlons
if timename is not None:
outxtimes = outxtimes[:, time_idx_or_slice]
if not multitime:
outxtimes = outxtimes[:, np.newaxis]
outcoords[timename] = outdimnames[0:2], outxtimes[:]
if not multitime:
outattrs["projection"] = outprojs[:, timeidx]
else:
outattrs["projection"] = outprojs
if not multitime:
outdata = outdata[:, timeidx, :]
outdata = outdata[:, np.newaxis, :]
outarr = DataArray(outdata, name=outname, coords=outcoords,
dims=outdimnames, attrs=outattrs)
else:
if not multitime:
outdata = outdata[:, timeidx, :]
outdata = outdata[:, np.newaxis, :]
outarr = outdata
return outarr
def combine_files(wrfseq, varname, timeidx, is_moving=None,
method="cat", squeeze=True, meta=True):
# Handles generators, single files, lists, tuples, custom classes
wrfseq = _get_iterable(wrfseq)
# Dictionary is unique
if _is_mapping(wrfseq):
outarr = _combine_dict(wrfseq, varname, timeidx, method, meta)
elif method.lower() == "cat":
outarr = _cat_files(wrfseq, varname, timeidx, is_moving,
squeeze, meta)
elif method.lower() == "join":
outarr = _join_files(wrfseq, varname, timeidx, is_moving, meta)
else:
raise ValueError("method must be 'cat' or 'join'")
return outarr.squeeze() if squeeze else outarr
# Cache is a dictionary of already extracted variables
def _extract_var(wrfnc, varname, timeidx, is_moving,
method, squeeze, cache, meta):
# Mainly used internally so variables don't get extracted multiple times,
# particularly to copy metadata. This can be slow.
if cache is not None:
try:
cache_var = cache[varname]
except KeyError:
pass
else:
if not meta:
if isinstance(cache_var, DataArray):
return cache_var.values
return cache_var
multitime = _is_multi_time_req(timeidx)
multifile = _is_multi_file(wrfnc)
if not multifile:
if xarray_enabled() and meta:
if is_moving is None:
is_moving = _is_moving_domain(wrfnc, varname)
result = _build_data_array(wrfnc, varname, timeidx, is_moving)
else:
if not multitime:
result = wrfnc.variables[varname][timeidx,:]
result = result[np.newaxis, :] # So that no squeeze works
else:
result = wrfnc.variables[varname][:]
else:
# Squeeze handled in this routine, so just return it
return combine_files(wrfnc, varname, timeidx, is_moving,
method, squeeze, meta)
return result.squeeze() if squeeze else result
def extract_vars(wrfnc, timeidx, varnames, method="cat", squeeze=True,
cache=None, meta=True):
if isstr(varnames):
varlist = [varnames]
else:
varlist = varnames
return {var:_extract_var(wrfnc, var, timeidx, None,
method, squeeze, cache, meta)
for var in varlist}
# Python 3 compatability
def _npbytes_to_str(var):
return (bytes(c).decode("utf-8") for c in var[:])
def _make_time(timearr):
return dt.datetime.strptime("".join(_npbytes_to_str(timearr)),
"%Y-%m-%d_%H:%M:%S")
def _file_times(wrfnc, timeidx):
multitime = _is_multi_time_req(timeidx)
if multitime:
times = wrfnc.variables["Times"][:,:]
for i in py3range(times.shape[0]):
yield _make_time(times[i,:])
else:
times = wrfnc.variables["Times"][timeidx,:]
yield _make_time(times)
def extract_times(wrfnc, timeidx):
multi_file = _is_multi_file(wrfnc)
if not multi_file:
wrf_list = [wrfnc]
else:
wrf_list = wrfnc
return [file_time
for wrf_file in wrf_list
for file_time in _file_times(wrf_file, timeidx)]
def is_standard_wrf_var(wrfnc, var):
multifile = _is_multi_file(wrfnc)
if multifile:
if not _is_mapping(wrfnc):
wrfnc = next(iter(wrfnc))
else:
entry = wrfnc[next(iter(viewkeys(wrfnc)))]
return is_standard_wrf_var(entry, var)
return var in wrfnc.variables
def is_staggered(var, wrfnc):
we = extract_dim(wrfnc, "west_east")
sn = extract_dim(wrfnc, "south_north")
bt = extract_dim(wrfnc, "bottom_top")
if (var.shape[-1] != we or var.shape[-2] != sn or var.shape[-3] != bt):
return True
return False
def get_left_indexes(ref_var, expected_dims):
"""Returns the extra left side dimensions for a variable with an expected
shape.
For example, if a 2D variable contains an additional left side dimension
for time, this will return the time dimension size.
"""
extra_dim_num = ref_var.ndim - expected_dims
if (extra_dim_num == 0):
return []
return tuple([ref_var.shape[x] for x in py3range(extra_dim_num)])
def iter_left_indexes(dims):
"""A generator which yields the iteration tuples for a sequence of
dimensions sizes.
For example, if an array shape is (3,3), then this will yield:
(0,0), (0,1), (1,0), (1,1)
Arguments:
- dims - a sequence of dimensions sizes (e.g. ndarry.shape)
"""
arg = [py3range(dim) for dim in dims]
for idxs in product(*arg):
yield idxs
def get_right_slices(var, right_ndims, fixed_val=0):
extra_dim_num = var.ndim - right_ndims
if extra_dim_num == 0:
return [slice(None)] * right_ndims
return tuple([fixed_val]*extra_dim_num +
[slice(None)]*right_ndims)
def get_proj_params(wrfnc, timeidx=0, varname=None):
proj_params = extract_global_attrs(wrfnc, attrs=("MAP_PROJ",
"CEN_LAT", "CEN_LON",
"TRUELAT1", "TRUELAT2",
"MOAD_CEN_LAT", "STAND_LON",
"POLE_LAT", "POLE_LON"))
multitime = _is_multi_time_req(timeidx)
if not multitime:
time_idx_or_slice = timeidx
else:
time_idx_or_slice = slice(None)
if varname is not None:
if not _is_coord_var(varname):
coord_names = getattr(wrfnc.variables[varname],
"coordinates").split()
lon_coord = coord_names[0]
lat_coord = coord_names[1]
else:
lat_coord, lon_coord = _get_coord_pairs(varname)
else:
lat_coord = "XLAT"
lon_coord = "XLONG"
return (wrfnc.variables[lat_coord][time_idx_or_slice,:],
wrfnc.variables[lon_coord][time_idx_or_slice,:],
proj_params)
class CoordPair(object):
def __init__(self, x=None, y=None, i=None, j=None, lat=None, lon=None):
self.x = x
self.y = y
self.i = i
self.j = j
self.lat = lat
self.lon = lon
def __repr__(self):
args = []
if self.x is not None:
args.append("x={}".format(self.x))
args.append("y={}".format(self.y))
if self.i is not None:
args.append("i={}".format(self.i))
args.append("j={}".format(self.j))
if self.lat is not None:
args.append("lat={}".format(self.lat))
args.append("lon={}".format(self.lon))
argstr = ", ".join(args)
return "{}({})".format(self.__class__.__name__, argstr)
def __str__(self):
return self.__repr__()
def from_args(func, argnames, *args, **kwargs):
"""Parses the function args and kargs looking for the desired argument
value. Otherwise, the value is taken from the default keyword argument
using the arg spec.
"""
if isstr(argnames):
arglist = [argnames]
else:
arglist = argnames
result = {}
for argname in arglist:
arg_loc = arg_location(func, argname, args, kwargs)
if arg_loc is not None:
result[argname] = arg_loc[0][arg_loc[1]]
else:
result[argname] = None
return result
def _args_to_list2(func, args, kwargs):
argspec = getargspec(func)
# Build the full tuple with defaults filled in
outargs = [None]*len(argspec.args)
for i,default in enumerate(argspec.defaults[::-1], 1):
outargs[-i] = default
# Add the supplied args
for i,arg in enumerate(args):
outargs[i] = arg
# Fill in the supplied kargs
for argname,val in viewitems(kwargs):
argidx = argspec.args.index(argname)
outargs[argidx] = val
return outargs
def _args_to_list3(func, args, kwargs):
sig = signature(func)
bound = sig.bind(*args, **kwargs)
bound.apply_defaults()
return [x for x in bound.arguments.values()]
def args_to_list(func, args, kwargs):
"""Converts the mixed args/kwargs to a single list of args"""
if version_info > (3,):
_args_to_list = _args_to_list3
else:
_args_to_list = _args_to_list2
return _args_to_list(func, args, kwargs)
def _arg_location2(func, argname, args, kwargs):
argspec = getargspec(func)
list_args = _args_to_list2(func, args, kwargs)
# Return the new sequence and location
if argname not in argspec.args and argname not in kwargs:
return None
result_idx = argspec.args.index(argname)
return list_args, result_idx
def _arg_location3(func, argname, args, kwargs):
sig = signature(func)
params = list(sig.parameters.keys())
list_args = _args_to_list3(func, args, kwargs)
try:
result_idx = params.index(argname)
except ValueError:
return None
return list_args, result_idx
def arg_location(func, argname, args, kwargs):
"""Parses the function args, kargs and signature looking for the desired
argument location (either in args, kargs, or argspec.defaults),
and returns a list containing representing all arguments in the
correct order with defaults filled in.
"""
if version_info > (3,):
_arg_location = _arg_location3
else:
_arg_location = _arg_location2
return _arg_location(func, argname, args, kwargs)