forked from 3rdparty/wrf-python
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1387 lines
44 KiB
1387 lines
44 KiB
from __future__ import (absolute_import, division, print_function, |
|
unicode_literals) |
|
|
|
from sys import version_info |
|
from copy import copy |
|
from collections import Iterable, Mapping, OrderedDict |
|
from itertools import product |
|
from types import GeneratorType |
|
import datetime as dt |
|
from math import floor, copysign |
|
|
|
from inspect import getmodule |
|
try: |
|
from inspect import signature |
|
except ImportError: |
|
from inspect import getargspec |
|
|
|
try: |
|
from inspect import getargvalues |
|
except ImportError: |
|
from inspect import getgeneratorlocals |
|
|
|
import numpy as np |
|
import numpy.ma as ma |
|
|
|
from .config import xarray_enabled |
|
from .projection import getproj, NullProjection |
|
from .constants import Constants, ALL_TIMES |
|
|
|
|
|
if xarray_enabled(): |
|
from xarray import DataArray |
|
from pandas import NaT |
|
|
|
__all__ = ["extract_vars", "extract_global_attrs", "extract_dim", |
|
"combine_files", "is_standard_wrf_var", "extract_times", |
|
"iter_left_indexes", "get_left_indexes", "get_right_slices", |
|
"is_staggered", "get_proj_params", "viewitems", "viewkeys", |
|
"viewvalues", "py2round", "py3range", "combine_with", "either", |
|
"from_args", "arg_location", "args_to_list", "npvalues", |
|
"CoordPair"] |
|
|
|
|
|
_COORD_PAIR_MAP = {"XLAT" : ("XLAT", "XLONG"), |
|
"XLONG" : ("XLAT", "XLONG"), |
|
"XLAT_M" : ("XLAT_M", "XLONG_M"), |
|
"XLONG_M" : ("XLAT_M", "XLONG_M"), |
|
"XLAT_U" : ("XLAT_U", "XLONG_U"), |
|
"XLONG_U" : ("XLAT_U", "XLONG_U"), |
|
"XLAT_V" : ("XLAT_V", "XLONG_V"), |
|
"XLONG_V" : ("XLAT_V", "XLONG_V"), |
|
"CLAT" : ("CLAT", "CLONG"), |
|
"CLONG" : ("CLAT", "CLONG")} |
|
|
|
|
|
_COORD_VARS = ("XLAT", "XLONG", "XLAT_M", "XLONG_M", "XLAT_U", "XLONG_U", |
|
"XLAT_V", "XLONG_V", "CLAT", "CLONG") |
|
|
|
_TIME_COORD_VARS = ("XTIME",) |
|
|
|
def _is_coord_var(varname): |
|
return varname in _COORD_VARS |
|
|
|
|
|
def _is_time_coord_var(varname): |
|
return varname in _TIME_COORD_VARS |
|
|
|
|
|
def _get_coord_pairs(varname): |
|
return _COORD_PAIR_MAP[varname] |
|
|
|
|
|
def _is_multi_time_req(timeidx): |
|
return timeidx is None |
|
|
|
|
|
def _is_multi_file(wrfnc): |
|
return (isinstance(wrfnc, Iterable) and not isstr(wrfnc)) |
|
|
|
def _has_time_coord(wrfnc): |
|
return "XTIME" in wrfnc.variables |
|
|
|
def _is_mapping(wrfnc): |
|
return isinstance(wrfnc, Mapping) |
|
|
|
def _generator_copy(gen): |
|
funcname = gen.__name__ |
|
try: |
|
argvals = getargvalues(gen.gi_frame) |
|
except NameError: |
|
argvals = getgeneratorlocals(gen) |
|
module = getmodule(gen.gi_frame) |
|
|
|
if module is not None: |
|
res = module.get(funcname)(**argvals.locals) |
|
else: |
|
# Created in jupyter or the python interpreter |
|
import __main__ |
|
res = getattr(__main__, funcname)(**argvals.locals) |
|
|
|
return res |
|
|
|
def test(): |
|
q = [1,2,3] |
|
for i in q: |
|
yield i |
|
|
|
class TestGen(object): |
|
def __init__(self, count=3): |
|
self._total = count |
|
self._i = 0 |
|
|
|
def __iter__(self): |
|
return self |
|
|
|
def next(self): |
|
if self._i >= self._total: |
|
raise StopIteration |
|
else: |
|
val = self._i |
|
self._i += 1 |
|
return val |
|
|
|
# Python 3 |
|
def __next__(self): |
|
return self.next() |
|
|
|
class IterWrapper(Iterable): |
|
"""A wrapper class for generators and custom iterable classes which returns |
|
a new iterator from the start of the sequence when __iter__ is called""" |
|
def __init__(self, wrapped): |
|
self._wrapped = wrapped |
|
|
|
def __iter__(self): |
|
if isinstance(self._wrapped, GeneratorType): |
|
return _generator_copy(self._wrapped) |
|
else: |
|
obj_copy = copy(self._wrapped) |
|
return obj_copy.__iter__() |
|
|
|
|
|
def _get_iterable(wrfseq): |
|
"""Returns a resetable iterable object.""" |
|
if not _is_multi_file(wrfseq): |
|
return wrfseq |
|
else: |
|
if not _is_mapping(wrfseq): |
|
|
|
if isinstance(wrfseq, (list, tuple, IterWrapper)): |
|
return wrfseq |
|
else: |
|
return IterWrapper(wrfseq) # generator/custom iterable class |
|
|
|
else: |
|
if isinstance(wrfseq, dict): |
|
return wrfseq |
|
else: |
|
return dict(wrfseq) # generator/custom iterable class |
|
|
|
# Dictionary python 2-3 compatibility stuff |
|
def viewitems(d): |
|
func = getattr(d, "viewitems", None) |
|
if func is None: |
|
func = d.items |
|
return func() |
|
|
|
|
|
def viewkeys(d): |
|
func = getattr(d, "viewkeys", None) |
|
if func is None: |
|
func = d.keys |
|
return func() |
|
|
|
|
|
def viewvalues(d): |
|
func = getattr(d, "viewvalues", None) |
|
if func is None: |
|
func = d.values |
|
return func() |
|
|
|
def isstr(s): |
|
try: |
|
return isinstance(s, basestring) |
|
except NameError: |
|
return isinstance(s, str) |
|
|
|
|
|
# Python 2 rounding behavior |
|
def _round2(x, d=0): |
|
p = 10 ** d |
|
return float(floor((x * p) + copysign(0.5, x)))/p |
|
|
|
|
|
def py2round(x, d=0): |
|
if version_info >= (3,): |
|
return _round2(x, d) |
|
|
|
return round(x, d) |
|
|
|
|
|
def py3range(*args): |
|
if version_info >= (3,): |
|
return range(*args) |
|
|
|
return xrange(*args) |
|
|
|
|
|
def ucode(*args, **kwargs): |
|
if version_info >= (3, ): |
|
return str(*args, **kwargs) |
|
|
|
return unicode(*args, **kwargs) |
|
|
|
|
|
# Helper to extract masked arrays from DataArrays that convert to NaN |
|
def npvalues(da): |
|
if not isinstance(da, DataArray): |
|
result = da |
|
else: |
|
try: |
|
fill_value = da.attrs["_FillValue"] |
|
except KeyError: |
|
result = da.values |
|
else: |
|
result = ma.masked_invalid(da.values, copy=False) |
|
result.set_fill_value(fill_value) |
|
|
|
return result |
|
|
|
# Helper utilities for metadata |
|
class either(object): |
|
def __init__(self, *varnames): |
|
self.varnames = varnames |
|
|
|
def __call__(self, wrfnc): |
|
if _is_multi_file(wrfnc): |
|
if not _is_mapping(wrfnc): |
|
wrfnc = next(iter(wrfnc)) |
|
else: |
|
entry = wrfnc[next(iter(viewkeys(wrfnc)))] |
|
return self(entry) |
|
|
|
for varname in self.varnames: |
|
if varname in wrfnc.variables: |
|
return varname |
|
|
|
raise ValueError("{} are not valid variable names".format( |
|
self.varnames)) |
|
|
|
class combine_with(object): |
|
# Remove remove_idx first, then insert_idx is applied to removed set |
|
def __init__(self, varname, remove_dims=None, insert_before=None, |
|
new_dimnames=None, new_coords=None): |
|
self.varname = varname |
|
self.remove_dims = remove_dims |
|
self.insert_before = insert_before |
|
self.new_dimnames = new_dimnames if new_dimnames is not None else [] |
|
self.new_coords = (new_coords if new_coords is not None |
|
else OrderedDict()) |
|
|
|
def __call__(self, var): |
|
new_dims = list(var.dims) |
|
new_coords = OrderedDict(var.coords) |
|
|
|
if self.remove_dims is not None: |
|
for dim in self.remove_dims: |
|
new_dims.remove(dim) |
|
del new_coords[dim] |
|
|
|
if self.insert_before is not None: |
|
insert_idx = new_dims.index(self.insert_before) |
|
new_dims = (new_dims[0:insert_idx] + self.new_dimnames + |
|
new_dims[insert_idx:]) |
|
elif self.new_dimnames is not None: |
|
new_dims = self.new_dimnames |
|
|
|
if self.new_coords is not None: |
|
new_coords.update(self.new_coords) |
|
|
|
return new_dims, new_coords |
|
|
|
|
|
def _corners_moved(wrfnc, first_ll_corner, first_ur_corner, latvar, lonvar): |
|
lats = wrfnc.variables[latvar] |
|
lons = wrfnc.variables[lonvar] |
|
|
|
# Need to check all times |
|
for i in py3range(lats.shape[-3]): |
|
start_idxs = [0]*len(lats.shape) # PyNIO does not support ndim |
|
start_idxs[-3] = i |
|
start_idxs = tuple(start_idxs) |
|
|
|
end_idxs = [-1]*len(lats.shape) |
|
end_idxs[-3] = i |
|
end_idxs = tuple(end_idxs) |
|
|
|
if (first_ll_corner[0] != lats[start_idxs] or |
|
first_ll_corner[1] != lons[start_idxs] or |
|
first_ur_corner[0] != lats[end_idxs] or |
|
first_ur_corner[1] != lons[end_idxs]): |
|
return True |
|
|
|
return False |
|
|
|
|
|
def _is_moving_domain(wrfseq, varname=None, latvar=either("XLAT", "XLAT_M"), |
|
lonvar=either("XLONG", "XLONG_M")): |
|
|
|
if isinstance(latvar, either): |
|
latvar = latvar(wrfseq) |
|
|
|
if isinstance(lonvar, either): |
|
lonvar = lonvar(wrfseq) |
|
|
|
# In case it's just a single file |
|
if not _is_multi_file(wrfseq): |
|
wrfseq = [wrfseq] |
|
|
|
# Slow, but safe. Compare the corner points to the first item and see |
|
# any move. User iterator protocol in case wrfseq is not a list/tuple. |
|
if not _is_mapping(wrfseq): |
|
wrf_iter = iter(wrfseq) |
|
first_wrfnc = next(wrf_iter) |
|
else: |
|
entry = wrfseq[next(iter(viewkeys(wrfseq)))] |
|
return _is_moving_domain(entry, varname, latvar, lonvar) |
|
|
|
# Quick check on pressure coordinates, bypassing the need to search the |
|
# domain corner points |
|
try: |
|
coord_names = getattr(first_wrfnc.variables["P"], |
|
"coordinates").split() |
|
except KeyError: |
|
pass |
|
else: |
|
if "XTIME" in coord_names: |
|
return True |
|
else: |
|
return False |
|
|
|
# The long way of checking all lat/lon corner points |
|
if varname is not None: |
|
try: |
|
coord_names = getattr(first_wrfnc.variables[varname], |
|
"coordinates").split() |
|
except AttributeError: |
|
# Variable doesn't have a coordinates attribute, use the |
|
# arguments |
|
lon_coord = lonvar |
|
lat_coord = latvar |
|
else: |
|
# If the XTIME variable is found to be a coordinate variable, |
|
# then it's a moving domain file |
|
try: |
|
xtime_coord = coord_names[2] |
|
except IndexError: |
|
# XTIME is not a coordinate variable, if the variable is in the |
|
# file, then this is not a moving domain file |
|
if "XTIME" in first_wrfnc.variables: |
|
return False |
|
|
|
else: |
|
# XTIME is a coordinate, so this is a moving domain file |
|
if xtime_coord == "XTIME": |
|
return True |
|
|
|
lon_coord = coord_names[0] |
|
lat_coord = coord_names[1] |
|
else: |
|
lon_coord = lonvar |
|
lat_coord = latvar |
|
|
|
# Probably a met_em file, need to search all the files |
|
lats = first_wrfnc.variables[lat_coord] |
|
lons = first_wrfnc.variables[lon_coord] |
|
|
|
zero_idxs = [0]*len(lats.shape) # PyNIO doesn't have ndim |
|
last_idxs = list(zero_idxs) |
|
last_idxs[-2:] = [-1]*2 |
|
|
|
zero_idxs = tuple(zero_idxs) |
|
last_idxs = tuple(last_idxs) |
|
|
|
lat0 = lats[zero_idxs] |
|
lat1 = lats[last_idxs] |
|
lon0 = lons[zero_idxs] |
|
lon1 = lons[last_idxs] |
|
|
|
ll_corner = (lat0, lon0) |
|
ur_corner = (lat1, lon1) |
|
|
|
while True: |
|
try: |
|
wrfnc = next(wrf_iter) |
|
except StopIteration: |
|
break |
|
else: |
|
if _corners_moved(wrfnc, ll_corner, ur_corner, |
|
lat_coord, lon_coord): |
|
return True |
|
|
|
return False |
|
|
|
def _get_global_attr(wrfnc, attr): |
|
val = getattr(wrfnc, attr, None) |
|
|
|
# PyNIO puts single values in to an array |
|
if isinstance(val, np.ndarray): |
|
if len(val) == 1: |
|
return val[0] |
|
return val |
|
|
|
def extract_global_attrs(wrfnc, attrs): |
|
if isstr(attrs): |
|
attrlist = [attrs] |
|
else: |
|
attrlist = attrs |
|
|
|
multifile = _is_multi_file(wrfnc) |
|
|
|
if multifile: |
|
if not _is_mapping(wrfnc): |
|
wrfnc = next(iter(wrfnc)) |
|
else: |
|
entry = wrfnc[next(iter(viewkeys(wrfnc)))] |
|
return extract_global_attrs(entry, attrs) |
|
|
|
return {attr:_get_global_attr(wrfnc, attr) for attr in attrlist} |
|
|
|
def extract_dim(wrfnc, dim): |
|
if _is_multi_file(wrfnc): |
|
if not _is_mapping(wrfnc): |
|
wrfnc = next(iter(wrfnc)) |
|
else: |
|
entry = wrfnc[next(iter(viewkeys(wrfnc)))] |
|
return extract_dim(entry, dim) |
|
|
|
d = wrfnc.dimensions[dim] |
|
if not isinstance(d, int): |
|
return len(d) #netCDF4 |
|
return d # PyNIO |
|
|
|
def _combine_dict(wrfdict, varname, timeidx, method, meta): |
|
"""Dictionary combination creates a new left index for each key, then |
|
does a cat or join for the list of files for that key""" |
|
keynames = [] |
|
numkeys = len(wrfdict) |
|
|
|
key_iter = iter(viewkeys(wrfdict)) |
|
first_key = next(key_iter) |
|
keynames.append(first_key) |
|
|
|
is_moving = _is_moving_domain(wrfdict, varname) |
|
|
|
first_array = _extract_var(wrfdict[first_key], varname, |
|
timeidx, is_moving=is_moving, method=method, |
|
squeeze=False, cache=None, meta=meta) |
|
|
|
|
|
# Create the output data numpy array based on the first array |
|
outdims = [numkeys] |
|
outdims += first_array.shape |
|
outdata = np.empty(outdims, first_array.dtype) |
|
outdata[0,:] = first_array[:] |
|
|
|
idx = 1 |
|
while True: |
|
try: |
|
key = next(key_iter) |
|
except StopIteration: |
|
break |
|
else: |
|
keynames.append(key) |
|
vardata = _extract_var(wrfdict[key], varname, timeidx, |
|
is_moving=is_moving, method=method, |
|
squeeze=False, cache=None, meta=meta) |
|
|
|
if outdata.shape[1:] != vardata.shape: |
|
raise ValueError("data sequences must have the " |
|
"same size for all dictionary keys") |
|
outdata[idx,:] = npvalues(vardata)[:] |
|
idx += 1 |
|
|
|
if xarray_enabled() and meta: |
|
outname = str(first_array.name) |
|
# Note: assumes that all entries in dict have same coords |
|
outcoords = OrderedDict(first_array.coords) |
|
outdims = ["key"] + list(first_array.dims) |
|
outcoords["key"] = keynames |
|
outattrs = OrderedDict(first_array.attrs) |
|
|
|
outarr = DataArray(outdata, name=outname, coords=outcoords, |
|
dims=outdims, attrs=outattrs) |
|
else: |
|
outarr = outdata |
|
|
|
return outarr |
|
|
|
def _find_coord_names(coords): |
|
try: |
|
lat_coord = [name for name in _COORD_VARS[0::2] if name in coords][0] |
|
except IndexError: |
|
lat_coord = None |
|
|
|
try: |
|
lon_coord = [name for name in _COORD_VARS[1::2] if name in coords][0] |
|
except IndexError: |
|
lon_coord = None |
|
|
|
try: |
|
xtime_coord = [name for name in _TIME_COORD_VARS if name in coords][0] |
|
except IndexError: |
|
xtime_coord = None |
|
|
|
return lat_coord, lon_coord, xtime_coord |
|
|
|
|
|
def _find_max_time_size(wrfseq): |
|
wrf_iter = iter(wrfseq) |
|
|
|
max_times = 0 |
|
while True: |
|
try: |
|
wrfnc = next(wrf_iter) |
|
except StopIteration: |
|
break |
|
else: |
|
t = extract_dim(wrfnc, "Time") |
|
max_times = t if t >= max_times else max_times |
|
|
|
return max_times |
|
|
|
|
|
def _build_data_array(wrfnc, varname, timeidx, is_moving_domain): |
|
multitime = _is_multi_time_req(timeidx) |
|
time_idx_or_slice = timeidx if not multitime else slice(None) |
|
var = wrfnc.variables[varname] |
|
data = var[time_idx_or_slice, :] |
|
|
|
# Want to preserve the time dimension |
|
if not multitime: |
|
data = data[np.newaxis, :] |
|
|
|
attrs = OrderedDict(var.__dict__) |
|
dimnames = var.dimensions[-data.ndim:] |
|
|
|
# WRF variables will have a coordinates attribute. MET_EM files have |
|
# a stagger attribute which indicates the coordinate variable. |
|
try: |
|
# WRF files |
|
coord_attr = getattr(var, "coordinates") |
|
except AttributeError: |
|
if _is_coord_var(varname): |
|
# Coordinate variable (most likely XLAT or XLONG) |
|
lat_coord, lon_coord = _get_coord_pairs(varname) |
|
time_coord = None |
|
|
|
if is_moving_domain and _has_time_coord(wrfnc): |
|
time_coord = "XTIME" |
|
|
|
else: |
|
try: |
|
# met_em files |
|
stag_attr = getattr(var, "stagger") |
|
except AttributeError: |
|
lon_coord = None |
|
lat_coord = None |
|
else: |
|
# For met_em files, use the stagger name to get the lat/lon var |
|
lat_coord = "XLAT_{}".format(stag_attr) |
|
lon_coord = "XLONG_{}".format(stag_attr) |
|
else: |
|
coord_names = coord_attr.split() |
|
lon_coord = coord_names[0] |
|
lat_coord = coord_names[1] |
|
|
|
try: |
|
time_coord = coord_names[2] |
|
except IndexError: |
|
time_coord = None |
|
|
|
coords = OrderedDict() |
|
|
|
# Handle lat/lon coordinates and projection information if available |
|
if lon_coord is not None and lat_coord is not None: |
|
lon_coord_var = wrfnc.variables[lon_coord] |
|
lat_coord_var = wrfnc.variables[lat_coord] |
|
time_coord_var = (wrfnc.variables[time_coord] |
|
if time_coord is not None |
|
else None) |
|
|
|
if multitime: |
|
if is_moving_domain: |
|
# Special case with a moving domain in a multi-time file, |
|
# otherwise the projection parameters don't change |
|
coords[lon_coord] = lon_coord_var.dimensions, lon_coord_var[:] |
|
coords[lat_coord] = lat_coord_var.dimensions, lat_coord_var[:] |
|
|
|
# Returned lats/lons arrays will have a time dimension, so proj |
|
# will need to be a list due to moving corner points |
|
lats, lons, proj_params = get_proj_params(wrfnc, |
|
timeidx, |
|
varname) |
|
proj = [getproj(lats=lats[i,:], |
|
lons=lons[i,:], |
|
**proj_params) for i in py3range(lats.shape[0])] |
|
|
|
if time_coord is not None: |
|
coords[time_coord] = (lon_coord_var.dimensions[0], |
|
time_coord_var[:]) |
|
else: |
|
coords[lon_coord] = (lon_coord_var.dimensions[1:], |
|
lon_coord_var[0,:]) |
|
coords[lat_coord] = (lat_coord_var.dimensions[1:], |
|
lat_coord_var[0,:]) |
|
|
|
# Domain not moving, so just get the first time |
|
lats, lons, proj_params = get_proj_params(wrfnc, 0, varname) |
|
proj = getproj(lats=lats, lons=lons, **proj_params) |
|
else: |
|
coords[lon_coord] = (lon_coord_var.dimensions[1:], |
|
lon_coord_var[timeidx,:]) |
|
coords[lat_coord] = (lat_coord_var.dimensions[1:], |
|
lat_coord_var[timeidx,:]) |
|
lats, lons, proj_params = get_proj_params(wrfnc, 0, varname) |
|
proj = getproj(lats=lats, lons=lons, **proj_params) |
|
|
|
attrs["projection"] = proj |
|
|
|
|
|
if dimnames[0] == "Time": |
|
coords[dimnames[0]] = extract_times(wrfnc, timeidx) |
|
|
|
data_array = DataArray(data, name=varname, dims=dimnames, coords=coords, |
|
attrs=attrs) |
|
|
|
|
|
return data_array |
|
|
|
|
|
def _find_forward(wrfseq, varname, timeidx, is_moving, meta): |
|
|
|
wrf_iter = iter(wrfseq) |
|
comboidx = 0 |
|
|
|
while True: |
|
try: |
|
wrfnc = next(wrf_iter) |
|
except StopIteration: |
|
break |
|
else: |
|
numtimes = extract_dim(wrfnc, "Time") |
|
|
|
if timeidx < comboidx + numtimes: |
|
filetimeidx = timeidx - comboidx |
|
|
|
if meta: |
|
return _build_data_array(wrfnc, varname, filetimeidx, |
|
is_moving) |
|
else: |
|
return wrfnc.variables[varname][filetimeidx, :] |
|
else: |
|
comboidx += numtimes |
|
|
|
raise IndexError("invalid time index") |
|
|
|
|
|
def _find_reverse(wrfseq, varname, timeidx, is_moving, meta): |
|
try: |
|
revwrfseq = reversed(wrfseq) |
|
except TypeError: |
|
revwrfseq = reversed(list(wrfseq)) |
|
|
|
wrf_iter = iter(revwrfseq) |
|
revtimeidx = -timeidx - 1 |
|
|
|
comboidx = 0 |
|
|
|
while True: |
|
try: |
|
wrfnc = next(wrf_iter) |
|
except StopIteration: |
|
break |
|
else: |
|
numtimes = extract_dim(wrfnc, "Time") |
|
|
|
if revtimeidx < comboidx + numtimes: |
|
# Finds the "forward" sequence index, then counts that |
|
# number back from the back of the ncfile times, |
|
# since the ncfile needs to be iterated backwards as well. |
|
filetimeidx = numtimes - (revtimeidx - comboidx) - 1 |
|
|
|
if meta: |
|
return _build_data_array(wrfnc, varname, filetimeidx, |
|
is_moving) |
|
else: |
|
return wrfnc.variables[varname][filetimeidx, :] |
|
else: |
|
comboidx += numtimes |
|
|
|
raise IndexError("invalid time index") |
|
|
|
|
|
def _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta): |
|
if timeidx >= 0: |
|
return _find_forward(wrfseq, varname, timeidx, is_moving, meta) |
|
else: |
|
return _find_reverse(wrfseq, varname, timeidx, is_moving, meta) |
|
|
|
# TODO: implement in C |
|
def _cat_files(wrfseq, varname, timeidx, is_moving, squeeze, meta): |
|
if is_moving is None: |
|
is_moving = _is_moving_domain(wrfseq, varname) |
|
|
|
file_times = extract_times(wrfseq, ALL_TIMES) |
|
|
|
multitime = _is_multi_time_req(timeidx) |
|
|
|
# For single times, just need to find the ncfile and appropriate |
|
# time index, and return that array |
|
if not multitime: |
|
return _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta) |
|
|
|
#time_idx_or_slice = timeidx if not multitime else slice(None) |
|
|
|
# If all times are requested, need to build a new array and cat together |
|
# all of the arrays in the sequence |
|
wrf_iter = iter(wrfseq) |
|
|
|
if xarray_enabled() and meta: |
|
first_var = _build_data_array(next(wrf_iter), varname, |
|
ALL_TIMES, is_moving) |
|
else: |
|
first_var = (next(wrf_iter)).variables[varname][:] |
|
|
|
outdims = [len(file_times)] |
|
|
|
# Making a new time dim, so ignore this one |
|
outdims += first_var.shape[1:] |
|
|
|
outdata = np.empty(outdims, first_var.dtype) |
|
|
|
numtimes = first_var.shape[0] |
|
startidx = 0 |
|
endidx = numtimes |
|
|
|
outdata[startidx:endidx, :] = first_var[:] |
|
|
|
if xarray_enabled() and meta and is_moving: |
|
latname, lonname, timename = _find_coord_names(first_var.coords) |
|
outcoorddims = outdims[0:1] + outdims[-2:] |
|
|
|
if latname is not None: |
|
outlats = np.empty(outcoorddims, first_var.dtype) |
|
outlats[startidx:endidx, :] = first_var.coords[latname][:] |
|
|
|
if lonname is not None: |
|
outlons = np.empty(outcoorddims, first_var.dtype) |
|
outlons[startidx:endidx, :] = first_var.coords[lonname][:] |
|
|
|
if timename is not None: |
|
outxtimes = np.empty(outdims[0]) |
|
outxtimes[startidx:endidx] = first_var.coords[timename][:] |
|
|
|
# Projections also need to be aggregated |
|
outprojs = np.empty(outdims[0], np.object) |
|
|
|
outprojs[startidx:endidx] = np.asarray(first_var.attrs["projection"], |
|
np.object)[:] |
|
|
|
startidx = endidx |
|
while True: |
|
try: |
|
wrfnc = next(wrf_iter) |
|
except StopIteration: |
|
break |
|
else: |
|
vardata = wrfnc.variables[varname][:] |
|
|
|
numtimes = vardata.shape[0] |
|
|
|
endidx = startidx + numtimes |
|
|
|
outdata[startidx:endidx, :] = vardata[:] |
|
|
|
if xarray_enabled() and meta and is_moving: |
|
if latname is not None: |
|
latdata = wrfnc.variables[latname][:] |
|
outlats[startidx:endidx, :] = latdata[:] |
|
|
|
if lonname is not None: |
|
londata = wrfnc.variables[lonname][:] |
|
outlons[startidx:endidx, :] = londata[:] |
|
|
|
if timename is not None: |
|
xtimedata = wrfnc.variables[timename][:] |
|
outxtimes[startidx:endidx] = xtimedata[:] |
|
|
|
lats, lons, proj_params = get_proj_params(wrfnc, |
|
ALL_TIMES, |
|
varname) |
|
projs = [getproj(lats=lats[i,:], |
|
lons=lons[i,:], |
|
**proj_params) for i in py3range(lats.shape[0])] |
|
|
|
outprojs[startidx:endidx] = np.asarray(projs, np.object)[:] |
|
|
|
startidx = endidx |
|
|
|
if xarray_enabled() and meta: |
|
outname = ucode(first_var.name) |
|
outattrs = OrderedDict(first_var.attrs) |
|
outcoords = OrderedDict(first_var.coords) |
|
outdimnames = list(first_var.dims) |
|
|
|
if "Time" not in outdimnames: |
|
outdimnames.insert(0, "Time") |
|
|
|
if not multitime: |
|
file_times = [file_times[timeidx]] |
|
|
|
outcoords[outdimnames[0]] = file_times |
|
|
|
outcoords["datetime"] = outdimnames[0], file_times |
|
|
|
# If the domain is moving, need to create the lat/lon/xtime coords |
|
# since they can't be copied |
|
if is_moving: |
|
outlatdims = [outdimnames[0]] + outdimnames[-2:] |
|
|
|
if latname is not None: |
|
outlats = outlats[:] |
|
outcoords[latname] = outlatdims, outlats |
|
if lonname is not None: |
|
outlons = outlons[:] |
|
outcoords[lonname] = outlatdims, outlons |
|
if timename is not None: |
|
outxtimes = outxtimes[:] |
|
outcoords[timename] = outdimnames[0], outxtimes |
|
|
|
outattrs["projection"] = outprojs[:] |
|
|
|
outdata = outdata[:] |
|
|
|
outarr = DataArray(outdata, name=outname, coords=outcoords, |
|
dims=outdimnames, attrs=outattrs) |
|
|
|
else: |
|
outdata = outdata[:] |
|
outarr = outdata |
|
|
|
return outarr |
|
|
|
def _get_numfiles(wrfseq): |
|
try: |
|
return len(wrfseq) |
|
except TypeError: |
|
wrf_iter = iter(wrfseq) |
|
return sum(1 for _ in wrf_iter) |
|
|
|
# TODO: implement in C |
|
def _join_files(wrfseq, varname, timeidx, is_moving, meta): |
|
if is_moving is None: |
|
is_moving = _is_moving_domain(wrfseq, varname) |
|
multitime = _is_multi_time_req(timeidx) |
|
numfiles = _get_numfiles(wrfseq) |
|
maxtimes = _find_max_time_size(wrfseq) |
|
|
|
time_idx_or_slice = timeidx if not multitime else slice(None) |
|
file_times_less_than_max = False |
|
file_idx = 0 |
|
|
|
# wrfseq might be a generator |
|
wrf_iter = iter(wrfseq) |
|
wrfnc = next(wrf_iter) |
|
numtimes = extract_dim(wrfnc, "Time") |
|
|
|
if xarray_enabled() and meta: |
|
first_var = _build_data_array(wrfnc, varname, ALL_TIMES, is_moving) |
|
time_coord = np.full((numfiles, maxtimes), int(NaT), "datetime64[ns]") |
|
time_coord[file_idx, 0:numtimes] = first_var.coords["Time"][:] |
|
else: |
|
first_var = wrfnc.variables[varname][:] |
|
|
|
if numtimes < maxtimes: |
|
file_times_less_than_max = True |
|
|
|
# Out dimensions will be the number of files, maxtimes, then the |
|
# non-time shapes from the first variable |
|
outdims = [numfiles] |
|
outdims += [maxtimes] |
|
outdims += first_var.shape[1:] |
|
|
|
# For join, always need to start with full masked values |
|
outdata = np.full(outdims, Constants.DEFAULT_FILL, first_var.dtype) |
|
outdata[file_idx, 0:numtimes, :] = first_var[:] |
|
|
|
# Create the secondary coordinate arrays |
|
if xarray_enabled() and meta and is_moving: |
|
latname, lonname, timename = _find_coord_names(first_var.coords) |
|
outcoorddims = outdims[0:2] + outdims[-2:] |
|
|
|
if latname is not None: |
|
outlats = np.full(outcoorddims, Constants.DEFAULT_FILL, |
|
first_var.dtype) |
|
outlats[file_idx, 0:numtimes, :] = first_var.coords[latname][:] |
|
|
|
if lonname is not None: |
|
outlons = np.full(outcoorddims, Constants.DEFAULT_FILL, |
|
first_var.dtype) |
|
outlons[file_idx, 0:numtimes, :] = first_var.coords[lonname][:] |
|
|
|
if timename is not None: |
|
outxtimes = np.full(outdims[0:2], Constants.DEFAULT_FILL, |
|
first_var.dtype) |
|
outxtimes[file_idx, 0:numtimes] = first_var.coords[timename][:] |
|
|
|
# Projections also need two dimensions |
|
outprojs = np.full(outdims[0:2], NullProjection(), np.object) |
|
|
|
outprojs[file_idx, 0:numtimes] = np.asarray( |
|
first_var.attrs["projection"], |
|
np.object)[:] |
|
|
|
file_idx=1 |
|
while True: |
|
try: |
|
wrfnc = next(wrf_iter) |
|
except StopIteration: |
|
break |
|
else: |
|
numtimes = extract_dim(wrfnc, "Time") |
|
if numtimes < maxtimes: |
|
file_times_less_than_max = True |
|
outvar = wrfnc.variables[varname][:] |
|
|
|
if not multitime: |
|
outvar = outvar[np.newaxis, :] |
|
|
|
outdata[file_idx, 0:numtimes, :] = outvar[:] |
|
|
|
if xarray_enabled() and meta: |
|
file_times = extract_times(wrfnc, ALL_TIMES) |
|
time_coord[file_idx, 0:numtimes] = np.asarray(file_times, |
|
"datetime64[ns]")[:] |
|
|
|
if xarray_enabled() and meta and is_moving: |
|
if latname is not None: |
|
latdata = wrfnc.variables[latname][:] |
|
outlats[file_idx, 0:numtimes, :] = latdata[:] |
|
|
|
if lonname is not None: |
|
londata = wrfnc.variables[lonname][:] |
|
outlons[file_idx, 0:numtimes, :] = londata[:] |
|
|
|
if timename is not None: |
|
xtimedata = wrfnc.variables[timename][:] |
|
outxtimes[file_idx, 0:numtimes] = xtimedata[:] |
|
|
|
lats, lons, proj_params = get_proj_params(wrfnc, |
|
ALL_TIMES, |
|
varname) |
|
projs = [getproj(lats=lats[i,:], |
|
lons=lons[i,:], |
|
**proj_params) for i in py3range(lats.shape[0])] |
|
|
|
outprojs[file_idx, 0:numtimes] = ( |
|
np.asarray(projs, np.object)[:]) |
|
|
|
# Need to update coords here |
|
file_idx += 1 |
|
|
|
# If any of the output files contain less than the max number of times, |
|
# then a mask array is needed to flag all the missing arrays with |
|
# missing values |
|
if file_times_less_than_max: |
|
outdata = np.ma.masked_values(outdata, Constants.DEFAULT_FILL) |
|
|
|
if xarray_enabled() and meta: |
|
outname = ucode(first_var.name) |
|
outcoords = OrderedDict(first_var.coords) |
|
outattrs = OrderedDict(first_var.attrs) |
|
# New dimensions |
|
outdimnames = ["file"] + list(first_var.dims) |
|
outcoords["file"] = [i for i in py3range(numfiles)] |
|
|
|
# Time needs to be multi dimensional, so use the default dimension |
|
del outcoords["Time"] |
|
|
|
time_coord = time_coord[:, time_idx_or_slice] |
|
if not multitime: |
|
time_coord = time_coord[:, np.newaxis] |
|
outcoords["datetime"] = outdimnames[0:2], time_coord |
|
|
|
if isinstance(outdata, np.ma.MaskedArray): |
|
outattrs["_FillValue"] = Constants.DEFAULT_FILL |
|
outattrs["missing_value"] = Constants.DEFAULT_FILL |
|
|
|
# If the domain is moving, need to create the lat/lon/xtime coords |
|
# since they can't be copied |
|
if is_moving: |
|
outlatdims = outdimnames[0:2] + outdimnames[-2:] |
|
|
|
if latname is not None: |
|
outlats = outlats[:, time_idx_or_slice, :] |
|
if not multitime: |
|
outlats = outlats[:, np.newaxis, :] |
|
outcoords[latname] = outlatdims, outlats |
|
if lonname is not None: |
|
outlons = outlons[:, time_idx_or_slice, :] |
|
if not multitime: |
|
outlons = outlons[:, np.newaxis, :] |
|
outcoords[lonname] = outlatdims, outlons |
|
if timename is not None: |
|
outxtimes = outxtimes[:, time_idx_or_slice] |
|
if not multitime: |
|
outxtimes = outxtimes[:, np.newaxis] |
|
outcoords[timename] = outdimnames[0:2], outxtimes[:] |
|
|
|
if not multitime: |
|
outattrs["projection"] = outprojs[:, timeidx] |
|
else: |
|
outattrs["projection"] = outprojs |
|
|
|
if not multitime: |
|
outdata = outdata[:, timeidx, :] |
|
outdata = outdata[:, np.newaxis, :] |
|
|
|
outarr = DataArray(outdata, name=outname, coords=outcoords, |
|
dims=outdimnames, attrs=outattrs) |
|
|
|
else: |
|
if not multitime: |
|
outdata = outdata[:, timeidx, :] |
|
outdata = outdata[:, np.newaxis, :] |
|
|
|
outarr = outdata |
|
|
|
return outarr |
|
|
|
def combine_files(wrfseq, varname, timeidx, is_moving=None, |
|
method="cat", squeeze=True, meta=True): |
|
|
|
# Handles generators, single files, lists, tuples, custom classes |
|
wrfseq = _get_iterable(wrfseq) |
|
|
|
# Dictionary is unique |
|
if _is_mapping(wrfseq): |
|
outarr = _combine_dict(wrfseq, varname, timeidx, method, meta) |
|
elif method.lower() == "cat": |
|
outarr = _cat_files(wrfseq, varname, timeidx, is_moving, |
|
squeeze, meta) |
|
elif method.lower() == "join": |
|
outarr = _join_files(wrfseq, varname, timeidx, is_moving, meta) |
|
else: |
|
raise ValueError("method must be 'cat' or 'join'") |
|
|
|
return outarr.squeeze() if squeeze else outarr |
|
|
|
|
|
# Cache is a dictionary of already extracted variables |
|
def _extract_var(wrfnc, varname, timeidx, is_moving, |
|
method, squeeze, cache, meta): |
|
# Mainly used internally so variables don't get extracted multiple times, |
|
# particularly to copy metadata. This can be slow. |
|
if cache is not None: |
|
try: |
|
cache_var = cache[varname] |
|
except KeyError: |
|
pass |
|
else: |
|
if not meta: |
|
if isinstance(cache_var, DataArray): |
|
return cache_var.values |
|
|
|
return cache_var |
|
|
|
multitime = _is_multi_time_req(timeidx) |
|
multifile = _is_multi_file(wrfnc) |
|
|
|
if not multifile: |
|
if xarray_enabled() and meta: |
|
if is_moving is None: |
|
is_moving = _is_moving_domain(wrfnc, varname) |
|
result = _build_data_array(wrfnc, varname, timeidx, is_moving) |
|
else: |
|
if not multitime: |
|
result = wrfnc.variables[varname][timeidx,:] |
|
result = result[np.newaxis, :] # So that no squeeze works |
|
else: |
|
result = wrfnc.variables[varname][:] |
|
else: |
|
# Squeeze handled in this routine, so just return it |
|
return combine_files(wrfnc, varname, timeidx, is_moving, |
|
method, squeeze, meta) |
|
|
|
return result.squeeze() if squeeze else result |
|
|
|
|
|
def extract_vars(wrfnc, timeidx, varnames, method="cat", squeeze=True, |
|
cache=None, meta=True): |
|
if isstr(varnames): |
|
varlist = [varnames] |
|
else: |
|
varlist = varnames |
|
|
|
return {var:_extract_var(wrfnc, var, timeidx, None, |
|
method, squeeze, cache, meta) |
|
for var in varlist} |
|
|
|
# Python 3 compatability |
|
def _npbytes_to_str(var): |
|
return (bytes(c).decode("utf-8") for c in var[:]) |
|
|
|
|
|
def _make_time(timearr): |
|
return dt.datetime.strptime("".join(_npbytes_to_str(timearr)), |
|
"%Y-%m-%d_%H:%M:%S") |
|
|
|
def _file_times(wrfnc, timeidx): |
|
multitime = _is_multi_time_req(timeidx) |
|
if multitime: |
|
times = wrfnc.variables["Times"][:,:] |
|
for i in py3range(times.shape[0]): |
|
yield _make_time(times[i,:]) |
|
else: |
|
times = wrfnc.variables["Times"][timeidx,:] |
|
yield _make_time(times) |
|
|
|
|
|
def extract_times(wrfnc, timeidx): |
|
multi_file = _is_multi_file(wrfnc) |
|
if not multi_file: |
|
wrf_list = [wrfnc] |
|
else: |
|
wrf_list = wrfnc |
|
|
|
return [file_time |
|
for wrf_file in wrf_list |
|
for file_time in _file_times(wrf_file, timeidx)] |
|
|
|
|
|
def is_standard_wrf_var(wrfnc, var): |
|
multifile = _is_multi_file(wrfnc) |
|
if multifile: |
|
if not _is_mapping(wrfnc): |
|
wrfnc = next(iter(wrfnc)) |
|
else: |
|
entry = wrfnc[next(iter(viewkeys(wrfnc)))] |
|
return is_standard_wrf_var(entry, var) |
|
|
|
return var in wrfnc.variables |
|
|
|
|
|
def is_staggered(var, wrfnc): |
|
we = extract_dim(wrfnc, "west_east") |
|
sn = extract_dim(wrfnc, "south_north") |
|
bt = extract_dim(wrfnc, "bottom_top") |
|
|
|
if (var.shape[-1] != we or var.shape[-2] != sn or var.shape[-3] != bt): |
|
return True |
|
|
|
return False |
|
|
|
|
|
|
|
def get_left_indexes(ref_var, expected_dims): |
|
"""Returns the extra left side dimensions for a variable with an expected |
|
shape. |
|
|
|
For example, if a 2D variable contains an additional left side dimension |
|
for time, this will return the time dimension size. |
|
|
|
""" |
|
extra_dim_num = ref_var.ndim - expected_dims |
|
|
|
if (extra_dim_num == 0): |
|
return [] |
|
|
|
return tuple([ref_var.shape[x] for x in py3range(extra_dim_num)]) |
|
|
|
def iter_left_indexes(dims): |
|
"""A generator which yields the iteration tuples for a sequence of |
|
dimensions sizes. |
|
|
|
For example, if an array shape is (3,3), then this will yield: |
|
|
|
(0,0), (0,1), (1,0), (1,1) |
|
|
|
Arguments: |
|
|
|
- dims - a sequence of dimensions sizes (e.g. ndarry.shape) |
|
|
|
""" |
|
arg = [py3range(dim) for dim in dims] |
|
for idxs in product(*arg): |
|
yield idxs |
|
|
|
def get_right_slices(var, right_ndims, fixed_val=0): |
|
extra_dim_num = var.ndim - right_ndims |
|
if extra_dim_num == 0: |
|
return [slice(None)] * right_ndims |
|
|
|
return tuple([fixed_val]*extra_dim_num + |
|
[slice(None)]*right_ndims) |
|
|
|
def get_proj_params(wrfnc, timeidx=0, varname=None): |
|
proj_params = extract_global_attrs(wrfnc, attrs=("MAP_PROJ", |
|
"CEN_LAT", "CEN_LON", |
|
"TRUELAT1", "TRUELAT2", |
|
"MOAD_CEN_LAT", "STAND_LON", |
|
"POLE_LAT", "POLE_LON")) |
|
multitime = _is_multi_time_req(timeidx) |
|
if not multitime: |
|
time_idx_or_slice = timeidx |
|
else: |
|
time_idx_or_slice = slice(None) |
|
|
|
if varname is not None: |
|
if not _is_coord_var(varname): |
|
coord_names = getattr(wrfnc.variables[varname], |
|
"coordinates").split() |
|
lon_coord = coord_names[0] |
|
lat_coord = coord_names[1] |
|
else: |
|
lat_coord, lon_coord = _get_coord_pairs(varname) |
|
else: |
|
lat_coord = "XLAT" |
|
lon_coord = "XLONG" |
|
|
|
return (wrfnc.variables[lat_coord][time_idx_or_slice,:], |
|
wrfnc.variables[lon_coord][time_idx_or_slice,:], |
|
proj_params) |
|
|
|
|
|
class CoordPair(object): |
|
def __init__(self, x=None, y=None, i=None, j=None, lat=None, lon=None): |
|
self.x = x |
|
self.y = y |
|
self.i = i |
|
self.j = j |
|
self.lat = lat |
|
self.lon = lon |
|
|
|
def __repr__(self): |
|
args = [] |
|
if self.x is not None: |
|
args.append("x={}".format(self.x)) |
|
args.append("y={}".format(self.y)) |
|
|
|
if self.i is not None: |
|
args.append("i={}".format(self.i)) |
|
args.append("j={}".format(self.j)) |
|
|
|
if self.lat is not None: |
|
args.append("lat={}".format(self.lat)) |
|
args.append("lon={}".format(self.lon)) |
|
|
|
argstr = ", ".join(args) |
|
|
|
return "{}({})".format(self.__class__.__name__, argstr) |
|
|
|
def __str__(self): |
|
return self.__repr__() |
|
|
|
|
|
def from_args(func, argnames, *args, **kwargs): |
|
"""Parses the function args and kargs looking for the desired argument |
|
value. Otherwise, the value is taken from the default keyword argument |
|
using the arg spec. |
|
|
|
""" |
|
if isstr(argnames): |
|
arglist = [argnames] |
|
else: |
|
arglist = argnames |
|
|
|
result = {} |
|
for argname in arglist: |
|
arg_loc = arg_location(func, argname, args, kwargs) |
|
|
|
if arg_loc is not None: |
|
result[argname] = arg_loc[0][arg_loc[1]] |
|
else: |
|
result[argname] = None |
|
|
|
return result |
|
|
|
def _args_to_list2(func, args, kwargs): |
|
argspec = getargspec(func) |
|
|
|
# Build the full tuple with defaults filled in |
|
outargs = [None]*len(argspec.args) |
|
for i,default in enumerate(argspec.defaults[::-1], 1): |
|
outargs[-i] = default |
|
|
|
# Add the supplied args |
|
for i,arg in enumerate(args): |
|
outargs[i] = arg |
|
|
|
# Fill in the supplied kargs |
|
for argname,val in viewitems(kwargs): |
|
argidx = argspec.args.index(argname) |
|
outargs[argidx] = val |
|
|
|
return outargs |
|
|
|
def _args_to_list3(func, args, kwargs): |
|
sig = signature(func) |
|
bound = sig.bind(*args, **kwargs) |
|
bound.apply_defaults() |
|
|
|
return [x for x in bound.arguments.values()] |
|
|
|
|
|
def args_to_list(func, args, kwargs): |
|
"""Converts the mixed args/kwargs to a single list of args""" |
|
if version_info > (3,): |
|
_args_to_list = _args_to_list3 |
|
else: |
|
_args_to_list = _args_to_list2 |
|
|
|
return _args_to_list(func, args, kwargs) |
|
|
|
|
|
def _arg_location2(func, argname, args, kwargs): |
|
argspec = getargspec(func) |
|
|
|
list_args = _args_to_list2(func, args, kwargs) |
|
|
|
# Return the new sequence and location |
|
if argname not in argspec.args and argname not in kwargs: |
|
return None |
|
|
|
result_idx = argspec.args.index(argname) |
|
|
|
return list_args, result_idx |
|
|
|
def _arg_location3(func, argname, args, kwargs): |
|
sig = signature(func) |
|
params = list(sig.parameters.keys()) |
|
|
|
list_args = _args_to_list3(func, args, kwargs) |
|
|
|
try: |
|
result_idx = params.index(argname) |
|
except ValueError: |
|
return None |
|
|
|
return list_args, result_idx |
|
|
|
|
|
def arg_location(func, argname, args, kwargs): |
|
"""Parses the function args, kargs and signature looking for the desired |
|
argument location (either in args, kargs, or argspec.defaults), |
|
and returns a list containing representing all arguments in the |
|
correct order with defaults filled in. |
|
|
|
""" |
|
if version_info > (3,): |
|
_arg_location = _arg_location3 |
|
else: |
|
_arg_location = _arg_location2 |
|
|
|
return _arg_location(func, argname, args, kwargs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|