wrf-python/src/wrf/util.py

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import os
from sys import version_info
from copy import copy
from collections import Iterable, Mapping, OrderedDict
from itertools import product
from types import GeneratorType
import datetime as dt
from inspect import getmodule

try:
    from inspect import signature
except ImportError:
    from inspect import getargspec

try:
    from inspect import getargvalues
except ImportError:
    from inspect import getgeneratorlocals

import numpy as np
import numpy.ma as ma

from .config import xarray_enabled
from .projection import getproj, NullProjection
from .constants import Constants, ALL_TIMES
from .py3compat import (viewitems, viewkeys, isstr, py3range, ucode)
from .cache import cache_item, get_cached_item

if xarray_enabled():
    from xarray import DataArray
    from pandas import NaT


_COORD_PAIR_MAP = {"XLAT" : ("XLAT", "XLONG"),
                "XLONG" : ("XLAT", "XLONG"),
                "XLAT_M" : ("XLAT_M", "XLONG_M"),
                "XLONG_M" : ("XLAT_M", "XLONG_M"),
                "XLAT_U" : ("XLAT_U", "XLONG_U"),
                "XLONG_U" : ("XLAT_U", "XLONG_U"),
                "XLAT_V" : ("XLAT_V", "XLONG_V"),
                "XLONG_V" : ("XLAT_V", "XLONG_V"),
                "CLAT" : ("CLAT", "CLONG"),
                "CLONG" : ("CLAT", "CLONG")}


_COORD_VARS = ("XLAT", "XLONG", "XLAT_M", "XLONG_M", "XLAT_U", "XLONG_U",
               "XLAT_V", "XLONG_V", "CLAT", "CLONG")

_LAT_COORDS = ("XLAT",  "XLAT_M", "XLAT_U",  "XLAT_V",  "CLAT")

_LON_COORDS = ("XLONG", "XLONG_M", "XLONG_U","XLONG_V", "CLONG")

_TIME_COORD_VARS = ("XTIME",)


def is_time_coord_var(varname):
    return varname in _TIME_COORD_VARS


def get_coord_pairs(varname):
    return _COORD_PAIR_MAP[varname]


def is_multi_time_req(timeidx):
    return timeidx is None


def is_multi_file(wrfnc):
    return (isinstance(wrfnc, Iterable) and not isstr(wrfnc))


def has_time_coord(wrfnc):
    return "XTIME" in wrfnc.variables


def is_mapping(wrfnc):
    return isinstance(wrfnc, Mapping)


def _generator_copy(gen):
    funcname = gen.__name__
    try:
        argvals = getargvalues(gen.gi_frame)
    except NameError:
        argvals = getgeneratorlocals(gen)
    module = getmodule(gen.gi_frame)

    if module is not None:
        res = module.get(funcname)(**argvals.locals)
    else:
        # Created in jupyter or the python interpreter
        import __main__
        res = getattr(__main__, funcname)(**argvals.locals)

    return res


def test():
    q = [1,2,3]
    for i in q:
        yield i


class TestGen(object):
    def __init__(self, count=3):
        self._total = count
        self._i = 0

    def __iter__(self):
        return self

    def next(self):
        if self._i >= self._total:
            raise StopIteration
        else:
            val = self._i
            self._i += 1
            return val

    # Python 3
    def __next__(self):
        return self.next()


def latlon_coordvars(d):
    lat_coord = None
    lon_coord = None

    for name in _LAT_COORDS:
        if name in viewkeys(d):
            lat_coord = name
            break

    for name in _LON_COORDS:
        if name in viewkeys(d):
            lon_coord = name
            break

    return lat_coord, lon_coord


def is_coordvar(varname):
    return varname in _COORD_VARS


class IterWrapper(Iterable):
    """A wrapper class for generators and custom iterable classes which returns
    a new iterator from the start of the sequence when __iter__ is called"""
    def __init__(self, wrapped):
        self._wrapped = wrapped

    def __iter__(self):
        if isinstance(self._wrapped, GeneratorType):
            return _generator_copy(self._wrapped)
        else:
            obj_copy = copy(self._wrapped)
            return obj_copy.__iter__()


def get_iterable(wrfseq):
    """Returns a resetable iterable object."""
    if not is_multi_file(wrfseq):
        return wrfseq
    else:
        if not is_mapping(wrfseq):

            if isinstance(wrfseq, (list, tuple, IterWrapper)):
                return wrfseq
            else:
                return IterWrapper(wrfseq) # generator/custom iterable class

        else:
            if isinstance(wrfseq, dict):
                return wrfseq
            else:
                return dict(wrfseq) # generator/custom iterable class


# Helper to extract masked arrays from DataArrays that convert to NaN
def npvalues(array_type):
    if not isinstance(array_type, DataArray):
        result = array_type
    else:
        try:
            fill_value = array_type.attrs["_FillValue"]
        except KeyError:
            result = array_type.values
        else:
            result = ma.masked_invalid(array_type.values, copy=False)
            result.set_fill_value(fill_value)

    return result


# Helper utilities for metadata
class either(object):
    def __init__(self, *varnames):
        self.varnames = varnames

    def __call__(self, wrfnc):
        if is_multi_file(wrfnc):
            if not is_mapping(wrfnc):
                wrfnc = next(iter(wrfnc))
            else:
                entry = wrfnc[next(iter(viewkeys(wrfnc)))]
                return self(entry)

        for varname in self.varnames:
            if varname in wrfnc.variables:
                return varname

        raise ValueError("{} are not valid variable names".format(
                                                            self.varnames))


class combine_with(object):
    # Remove remove_idx first, then insert_idx is applied to removed set
    def __init__(self, varname, remove_dims=None, insert_before=None,
                 new_dimnames=None, new_coords=None):
        self.varname = varname
        self.remove_dims = remove_dims
        self.insert_before = insert_before
        self.new_dimnames = new_dimnames if new_dimnames is not None else []
        self.new_coords = (new_coords if new_coords is not None
                           else OrderedDict())

    def __call__(self, var):
        new_dims = list(var.dims)
        new_coords = OrderedDict(var.coords)

        if self.remove_dims is not None:
            for dim in self.remove_dims:
                new_dims.remove(dim)
                del new_coords[dim]

        if self.insert_before is not None:
            insert_idx = new_dims.index(self.insert_before)
            new_dims = (new_dims[0:insert_idx] + self.new_dimnames +
                    new_dims[insert_idx:])
        elif self.new_dimnames is not None:
            new_dims = self.new_dimnames

        if self.new_coords is not None:
            new_coords.update(self.new_coords)

        return new_dims, new_coords


# This should look like:
# [(0, (-3,-2)), # variable 1
# (1, -1)] # variable 2
class combine_dims(object):

    def __init__(self, pairs):
        self.pairs = pairs

    def __call__(self, *args):
        result = []
        for pair in self.pairs:
            var = args[pair[0]]
            dimidxs = pair[1]
            if isinstance(dimidxs, Iterable):
                for dimidx in dimidxs:
                    result.append(var.shape[dimidx])
            else:
                result.append(var.shape[dimidxs])

        return tuple(result)


class from_var(object):
    def __init__(self, varname, attribute):
        self.varname = varname
        self.attribute = attribute

    def __call__(self, wrapped, *args, **kwargs):
        vard = from_args(wrapped, (self.varname,), *args, **kwargs)

        var = None
        if vard is not None:
            var = vard[self.varname]

        if not isinstance(var, DataArray):
            return None

        return var.attrs.get(self.attribute, None)


def _corners_moved(wrfnc, first_ll_corner, first_ur_corner, latvar, lonvar):
    lats = wrfnc.variables[latvar][:]
    lons = wrfnc.variables[lonvar][:]

    # Need to check all times
    for i in py3range(lats.shape[-3]):
        start_idxs = [0]*len(lats.shape) # PyNIO does not support ndim
        start_idxs[-3] = i
        start_idxs = tuple(start_idxs)

        end_idxs = [-1]*len(lats.shape)
        end_idxs[-3] = i
        end_idxs = tuple(end_idxs)

        if (first_ll_corner[0] != lats[start_idxs] or
            first_ll_corner[1] != lons[start_idxs] or
            first_ur_corner[0] != lats[end_idxs] or
            first_ur_corner[1] != lons[end_idxs]):
            return True

    return False


def is_moving_domain(wrfseq, varname=None, latvar=either("XLAT", "XLAT_M"),
                      lonvar=either("XLONG", "XLONG_M"), _key=None):

    if isinstance(latvar, either):
        latvar = latvar(wrfseq)

    if isinstance(lonvar, either):
        lonvar = lonvar(wrfseq)

    # In case it's just a single file
    if not is_multi_file(wrfseq):
        wrfseq = [wrfseq]

    # Slow, but safe. Compare the corner points to the first item and see
    # any move.  User iterator protocol in case wrfseq is not a list/tuple.
    if not is_mapping(wrfseq):
        wrf_iter = iter(wrfseq)
        first_wrfnc = next(wrf_iter)
    else:
        # Currently only checking the first dict entry.
        dict_key = next(iter(viewkeys(wrfseq)))
        entry = wrfseq[dict_key]
        key = _key[dict_key] if _key is not None else None
        return is_moving_domain(entry, varname, latvar, lonvar, key)

    # The long way of checking all lat/lon corner points.  Doesn't appear
    # to be a shortcut in the netcdf files.
    if varname is not None:
        try:
            coord_names = getattr(first_wrfnc.variables[varname],
                              "coordinates").split()
        except AttributeError:
            # Variable doesn't have a coordinates attribute, use the
            # arguments
            lon_coord = lonvar
            lat_coord = latvar
        else:
            lon_coord = coord_names[0]
            lat_coord = coord_names[1]
    else:
        lon_coord = lonvar
        lat_coord = latvar

    # See if there is a cached value
    product = "is_moving_{}_{}".format(lat_coord, lon_coord)
    moving = get_cached_item(_key, product)
    if moving is not None:
        return moving

    # Need to search all the files
    lats = first_wrfnc.variables[lat_coord][:]
    lons = first_wrfnc.variables[lon_coord][:]

    zero_idxs = [0]*len(lats.shape)  # PyNIO doesn't have ndim
    last_idxs = list(zero_idxs)
    last_idxs[-2:] = [-1]*2

    zero_idxs = tuple(zero_idxs)
    last_idxs = tuple(last_idxs)

    lat0 = lats[zero_idxs]
    lat1 = lats[last_idxs]
    lon0 = lons[zero_idxs]
    lon1 = lons[last_idxs]

    ll_corner = (lat0, lon0)
    ur_corner = (lat1, lon1)

    # Need to check if the first file is moving, might be a single
    # file with multiple times
    if _corners_moved(first_wrfnc, ll_corner, ur_corner, lat_coord, lon_coord):
        cache_item(_key, product, True)
        return True

    # Now check any additional files
    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            if _corners_moved(wrfnc, ll_corner, ur_corner,
                              lat_coord, lon_coord):

                cache_item(_key, product, True)
                return True

    cache_item(_key, product, False)

    return False


def _get_global_attr(wrfnc, attr):
    val = getattr(wrfnc, attr, None)

    # PyNIO puts single values in to an array
    if isinstance(val, np.ndarray):
        if len(val) == 1:
            return val[0]
    return val


def extract_global_attrs(wrfnc, attrs):
    if isstr(attrs):
        attrlist = [attrs]
    else:
        attrlist = attrs

    multifile = is_multi_file(wrfnc)

    if multifile:
        if not is_mapping(wrfnc):
            wrfnc = next(iter(wrfnc))
        else:
            entry = wrfnc[next(iter(viewkeys(wrfnc)))]
            return extract_global_attrs(entry, attrs)

    return {attr:_get_global_attr(wrfnc, attr) for attr in attrlist}


def extract_dim(wrfnc, dim):
    if is_multi_file(wrfnc):
        if not is_mapping(wrfnc):
            wrfnc = next(iter(wrfnc))
        else:
            entry = wrfnc[next(iter(viewkeys(wrfnc)))]
            return extract_dim(entry, dim)

    d = wrfnc.dimensions[dim]
    if not isinstance(d, int):
        return len(d) #netCDF4
    return d # PyNIO


def _combine_dict(wrfdict, varname, timeidx, method, meta, _key):
    """Dictionary combination creates a new left index for each key, then
    does a cat or join for the list of files for that key"""
    keynames = []
    numkeys = len(wrfdict)

    key_iter = iter(viewkeys(wrfdict))
    first_key = next(key_iter)
    keynames.append(first_key)

    is_moving = is_moving_domain(wrfdict, varname, _key=_key)

    # Not quite sure how to handle coord caching with dictionaries, so
    # disabling it for now by setting _key to None.
    first_array = _extract_var(wrfdict[first_key], varname,
                              timeidx, is_moving=is_moving, method=method,
                              squeeze=False, cache=None, meta=meta,
                              _key=_key[first_key])


    # Create the output data numpy array based on the first array
    outdims = [numkeys]
    outdims += first_array.shape
    outdata = np.empty(outdims, first_array.dtype)
    outdata[0,:] = first_array[:]

    idx = 1
    while True:
        try:
            key = next(key_iter)
        except StopIteration:
            break
        else:
            keynames.append(key)
            vardata = _extract_var(wrfdict[key], varname, timeidx,
                                   is_moving=is_moving, method=method,
                                   squeeze=False, cache=None, meta=meta,
                                   _key=_key[key])

            if outdata.shape[1:] != vardata.shape:
                raise ValueError("data sequences must have the "
                                   "same size for all dictionary keys")
            outdata[idx,:] = npvalues(vardata)[:]
            idx += 1

    if xarray_enabled() and meta:
        outname = str(first_array.name)
        # Note: assumes that all entries in dict have same coords
        outcoords = OrderedDict(first_array.coords)

        # First find and store all the existing key coord names/values
        # This is applicable only if there are nested dictionaries.
        key_coordnames = []
        coord_vals = []
        existing_cnt = 0
        while True:
            key_coord_name = "key_{}".format(existing_cnt)

            if key_coord_name not in first_array.dims:
                break

            key_coordnames.append(key_coord_name)
            coord_vals.append(npvalues(first_array.coords[key_coord_name]))

            existing_cnt += 1

        # Now add the key coord name and values for THIS dictionary.
        # Put the new key_n name at the bottom, but the new values will
        # be at the top to be associated with key_0 (left most).  This
        # effectively shifts the existing 'key_n' coordinate values to the
        # right one dimension so *this* dicionary's key coordinate values
        # are at 'key_0'.
        key_coordnames.append(key_coord_name)
        coord_vals.insert(0, keynames)

        # make it so that key_0 is leftmost
        outdims = key_coordnames + list(first_array.dims[existing_cnt:])


        # Create the new 'key_n', value pairs
        for coordname, coordval in zip(key_coordnames, coord_vals):
            outcoords[coordname] = coordval


        outattrs = OrderedDict(first_array.attrs)

        outarr = DataArray(outdata, name=outname, coords=outcoords,
                           dims=outdims, attrs=outattrs)
    else:
        outarr = outdata

    return outarr


def _find_coord_names(coords):
    try:
        lat_coord = [name for name in _COORD_VARS[0::2] if name in coords][0]
    except IndexError:
        lat_coord = None

    try:
        lon_coord = [name for name in _COORD_VARS[1::2] if name in coords][0]
    except IndexError:
        lon_coord = None

    try:
        xtime_coord = [name for name in _TIME_COORD_VARS if name in coords][0]
    except IndexError:
        xtime_coord = None

    return lat_coord, lon_coord, xtime_coord


def _find_max_time_size(wrfseq):
    wrf_iter = iter(wrfseq)

    max_times = 0
    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            t = extract_dim(wrfnc, "Time")
            max_times = t if t >= max_times else max_times

    return max_times


def _build_data_array(wrfnc, varname, timeidx, is_moving_domain, is_multifile,
                      _key):

    # Note:  wrfnc is always a single netcdf file object
    # is_moving_domain and is_multifile are arguments indicating if the
    # single file came from a sequence, and if that sequence is has a moving
    # domain.  Both arguments are used mainly for coordinate extraction and
    # caching.
    multitime = is_multi_time_req(timeidx)
    time_idx_or_slice = timeidx if not multitime else slice(None)
    var = wrfnc.variables[varname]
    data = var[time_idx_or_slice, :]
    time_coord = None

    # Want to preserve the time dimension
    if not multitime:
        data = data[np.newaxis, :]

    attrs = OrderedDict(var.__dict__)
    dimnames = var.dimensions[-data.ndim:]

    # WRF variables will have a coordinates attribute.  MET_EM files have
    # a stagger attribute which indicates the coordinate variable.
    try:
        # WRF files
        coord_attr = getattr(var, "coordinates")
    except AttributeError:

        if is_coordvar(varname):
            # Coordinate variable (most likely XLAT or XLONG)
            lat_coord, lon_coord = get_coord_pairs(varname)
            time_coord = None

            if has_time_coord(wrfnc):
                time_coord = "XTIME"

        elif is_time_coord_var(varname):
            lon_coord = None
            lat_coord = None
            time_coord = None
        else:
            try:
                # met_em files
                stag_attr = getattr(var, "stagger")
            except AttributeError:
                lon_coord = None
                lat_coord = None
            else:
                # For met_em files, use the stagger name to get the lat/lon var
                lat_coord = "XLAT_{}".format(stag_attr)
                lon_coord = "XLONG_{}".format(stag_attr)
    else:
        coord_names = coord_attr.split()
        lon_coord = coord_names[0]
        lat_coord = coord_names[1]

        try:
            time_coord = coord_names[2]
        except IndexError:
            time_coord = None

    coords = OrderedDict()

    # Handle lat/lon coordinates and projection information if available
    if lon_coord is not None and lat_coord is not None:
        # Using a cache for coordinate variables so the extraction only happens
        # once.
        lon_coord_dimkey = lon_coord + "_dim"
        lon_coord_valkey = lon_coord + "_val"
        lat_coord_dimkey = lat_coord + "_dim"
        lat_coord_valkey = lat_coord + "_val"

        lon_coord_dims = get_cached_item(_key, lon_coord_dimkey)
        lon_coord_vals = get_cached_item(_key, lon_coord_valkey)
        if lon_coord_dims is None or lon_coord_vals is None:
            lon_var = wrfnc.variables[lon_coord]
            lon_coord_dims = lon_var.dimensions
            lon_coord_vals = lon_var[:]

            # Only cache here if the domain is not moving, otherwise
            # caching is handled by cat/join
            if not is_moving_domain:
                cache_item(_key, lon_coord_dimkey, lon_coord_dims)
                cache_item(_key, lon_coord_valkey, lon_coord_vals)

        lat_coord_dims = get_cached_item(_key, lat_coord_dimkey)
        lat_coord_vals = get_cached_item(_key, lat_coord_valkey)
        if lat_coord_dims is None or lat_coord_vals is None:
            lat_var = wrfnc.variables[lat_coord]
            lat_coord_dims = lat_var.dimensions
            lat_coord_vals = lat_var[:]

            # Only cache here if the domain is not moving, otherwise
            # caching is done in cat/join
            if not is_moving_domain:
                cache_item(_key, lat_coord_dimkey, lat_coord_dims)
                cache_item(_key, lat_coord_valkey, lat_coord_vals)

        time_coord_vals = None
        if time_coord is not None:
            # If not from a multifile sequence, then cache the time
            # coordinate.  Otherwise, handled in cat/join/
            if not is_multifile:
                time_coord_vals = get_cached_item(_key, time_coord)

                if time_coord_vals is None:
                    time_coord_vals = wrfnc.variables[time_coord][:]

                    if not is_multifile:
                        cache_item(_key, time_coord, time_coord_vals)
            else:
                time_coord_vals = wrfnc.variables[time_coord][:]

        if multitime:
            if is_moving_domain:
                # Special case with a moving domain in a multi-time file,
                # otherwise the projection parameters don't change
                coords[lon_coord] = lon_coord_dims, lon_coord_vals
                coords[lat_coord] = lat_coord_dims, lat_coord_vals

                # Returned lats/lons arrays will have a time dimension, so proj
                # will need to be a list due to moving corner points
                lats, lons, proj_params = get_proj_params(wrfnc,
                                                          timeidx,
                                                          varname)
                proj = [getproj(lats=lats[i,:],
                            lons=lons[i,:],
                            **proj_params) for i in py3range(lats.shape[0])]

            else:
                coords[lon_coord] = (lon_coord_dims[1:],
                                     lon_coord_vals[0,:])
                coords[lat_coord] = (lat_coord_dims[1:],
                                     lat_coord_vals[0,:])

                # Domain not moving, so just get the first time
                lats, lons, proj_params = get_proj_params(wrfnc, 0, varname)
                proj = getproj(lats=lats, lons=lons, **proj_params)

            if time_coord is not None:
                coords[time_coord] = (lon_coord_dims[0], time_coord_vals)
        else:
            coords[lon_coord] = (lon_coord_dims[1:],
                                 lon_coord_vals[timeidx,:])
            coords[lat_coord] = (lat_coord_dims[1:],
                                 lat_coord_vals[timeidx,:])

            if time_coord is not None:
                coords[time_coord] = (lon_coord_dims[0],
                                      [time_coord_vals[timeidx]])


            lats, lons, proj_params = get_proj_params(wrfnc, 0, varname)
            proj = getproj(lats=lats, lons=lons, **proj_params)

        attrs["projection"] = proj


    if dimnames[0] == "Time":
        t = extract_times(wrfnc, timeidx, meta=False, do_xtime=False)
        if not multitime:
            t = [t]
        coords[dimnames[0]] = t

    data_array = DataArray(data, name=varname, dims=dimnames, coords=coords,
                           attrs=attrs)


    return data_array


def _find_forward(wrfseq, varname, timeidx, is_moving, meta, _key):

    wrf_iter = iter(wrfseq)
    comboidx = 0

    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            numtimes = extract_dim(wrfnc, "Time")

            if timeidx < comboidx + numtimes:
                filetimeidx = timeidx - comboidx

                if meta:
                    return _build_data_array(wrfnc, varname, filetimeidx,
                                             is_moving, True, _key)
                else:
                    result = wrfnc.variables[varname][filetimeidx, :]
                    return result[np.newaxis, :]  # So that nosqueeze works

            else:
                comboidx += numtimes

    raise IndexError("timeidx {} is out of bounds".format(timeidx))


def _find_reverse(wrfseq, varname, timeidx, is_moving, meta, _key):
    try:
        revwrfseq = reversed(wrfseq)
    except TypeError:
        revwrfseq = reversed(list(wrfseq))

    wrf_iter = iter(revwrfseq)
    revtimeidx = -timeidx - 1

    comboidx = 0

    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            numtimes = extract_dim(wrfnc, "Time")

            if revtimeidx < comboidx + numtimes:
                # Finds the "forward" sequence index, then counts that
                # number back from the back of the ncfile times,
                # since the ncfile  needs to be iterated backwards as well.
                filetimeidx = numtimes - (revtimeidx - comboidx) - 1

                if meta:
                    return _build_data_array(wrfnc, varname, filetimeidx,
                                             is_moving, True, _key)
                else:
                    result = wrfnc.variables[varname][filetimeidx, :]
                    return result[np.newaxis, :] # So that nosqueeze works
            else:
                comboidx += numtimes

    raise IndexError("timeidx {} is out of bounds".format(timeidx))


def _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta, _key):
    if timeidx >= 0:
        return _find_forward(wrfseq, varname, timeidx, is_moving, meta, _key)
    else:
        return _find_reverse(wrfseq, varname, timeidx, is_moving, meta, _key)


# TODO:  implement in C
def _cat_files(wrfseq, varname, timeidx, is_moving, squeeze, meta, _key):
    if is_moving is None:
        is_moving = is_moving_domain(wrfseq, varname, _key=_key)

    file_times = extract_times(wrfseq, ALL_TIMES, meta=False, do_xtime=False)

    multitime = is_multi_time_req(timeidx)

    # For single times, just need to find the ncfile and appropriate
    # time index, and return that array
    if not multitime:
        return _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta,
                                  _key)

    #time_idx_or_slice = timeidx if not multitime else slice(None)

    # If all times are requested, need to build a new array and cat together
    # all of the arrays in the sequence
    wrf_iter = iter(wrfseq)

    if xarray_enabled() and meta:
        first_var = _build_data_array(next(wrf_iter), varname,
                                      ALL_TIMES, is_moving, True, _key)
    else:
        first_var = (next(wrf_iter)).variables[varname][:]

    outdims = [len(file_times)]

    # Making a new time dim, so ignore this one
    outdims += first_var.shape[1:]

    outdata = np.empty(outdims, first_var.dtype)

    numtimes = first_var.shape[0]
    startidx = 0
    endidx = numtimes

    outdata[startidx:endidx, :] = first_var[:]

    if xarray_enabled() and meta:
        latname, lonname, timename = _find_coord_names(first_var.coords)

        timecached = False
        latcached = False
        loncached = False
        projcached = False

        outxtimes = None
        outlats = None
        outlons = None
        outprojs = None

        timekey = timename+"_cat" if timename is not None else None
        latkey = latname + "_cat" if latname is not None else None
        lonkey = lonname + "_cat" if lonname is not None else None
        projkey = "projection_cat" if is_moving else None

        if timename is not None:
            outxtimes = get_cached_item(_key, timekey)
            if outxtimes is None:
                outxtimes = np.empty(outdims[0])
                outxtimes[startidx:endidx] = npvalues(first_var.coords[timename][:])
            else:
                timecached = True

        if is_moving:
            outcoorddims = outdims[0:1] + outdims[-2:]

            if latname is not None:
                # Try to pull from the coord cache
                outlats = get_cached_item(_key, latkey)
                if outlats is None:
                    outlats = np.empty(outcoorddims, first_var.dtype)
                    outlats[startidx:endidx, :] = npvalues(first_var.coords[latname][:])
                else:
                    latcached = True

            if lonname is not None:
                outlons = get_cached_item(_key, lonkey)
                if outlons is None:
                    outlons = np.empty(outcoorddims, first_var.dtype)
                    outlons[startidx:endidx, :] = npvalues(first_var.coords[lonname][:])
                else:
                    loncached = True

            # Projections also need to be aggregated

            outprojs = get_cached_item(_key, projkey)
            if outprojs is None:
                outprojs = np.empty(outdims[0], np.object)
                outprojs[startidx:endidx] = np.asarray(
                    first_var.attrs["projection"], np.object)[:]
            else:
                projcached = True


    startidx = endidx
    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            vardata = wrfnc.variables[varname][:]

            numtimes = vardata.shape[0]

            endidx = startidx + numtimes

            outdata[startidx:endidx, :] = vardata[:]

            if xarray_enabled() and meta:
                # XTIME new in 3.7
                if timename is not None and not timecached:
                    xtimedata = wrfnc.variables[timename][:]
                    outxtimes[startidx:endidx] = xtimedata[:]

                if is_moving:
                    if latname is not None and not latcached:
                        latdata = wrfnc.variables[latname][:]
                        outlats[startidx:endidx, :] = latdata[:]

                    if lonname is not None and not loncached:
                        londata = wrfnc.variables[lonname][:]
                        outlons[startidx:endidx, :] = londata[:]

                    if not projcached:
                        lats, lons, proj_params = get_proj_params(wrfnc,
                                                              ALL_TIMES,
                                                              varname)
                        projs = [getproj(lats=lats[i,:],
                            lons=lons[i,:],
                            **proj_params) for i in py3range(lats.shape[0])]

                        outprojs[startidx:endidx] = np.asarray(projs,
                                                               np.object)[:]

            startidx = endidx

    if xarray_enabled() and meta:

        # Cache the coords if applicable
        if not latcached and outlats is not None:
            cache_item(_key, latkey, outlats)
        if not loncached and outlons is not None:
            cache_item(_key, lonkey, outlons)
        if not projcached and outprojs is not None:
            cache_item(_key, projkey, outprojs)
        if not timecached and outxtimes is not None:
            cache_item(_key, timekey, outxtimes)

        outname = ucode(first_var.name)
        outattrs = OrderedDict(first_var.attrs)
        outcoords = OrderedDict(first_var.coords)
        outdimnames = list(first_var.dims)

        if "Time" not in outdimnames:
            outdimnames.insert(0, "Time")

        if not multitime:
            file_times = [file_times[timeidx]]

        outcoords[outdimnames[0]] = file_times

        outcoords["datetime"] = outdimnames[0], file_times

        if timename is not None:
            outxtimes = outxtimes[:]
            outcoords[timename] = outdimnames[0], outxtimes

        # If the domain is moving, need to create the lat/lon coords
        # since they can't be copied
        if is_moving:
            outlatdims = [outdimnames[0]] + outdimnames[-2:]

            if latname is not None:
                outlats = outlats[:]
                outcoords[latname] = outlatdims, outlats
            if lonname is not None:
                outlons = outlons[:]
                outcoords[lonname] = outlatdims, outlons

            outattrs["projection"] = outprojs[:]

        outdata = outdata[:]

        outarr = DataArray(outdata, name=outname, coords=outcoords,
                           dims=outdimnames, attrs=outattrs)

    else:
        outdata = outdata[:]
        outarr = outdata

    return outarr


def _get_numfiles(wrfseq):
    try:
        return len(wrfseq)
    except TypeError:
        wrf_iter = iter(wrfseq)
        return sum(1 for _ in wrf_iter)


# TODO:  implement in C
def _join_files(wrfseq, varname, timeidx, is_moving, meta, _key):
    if is_moving is None:
        is_moving = is_moving_domain(wrfseq, varname, _key=_key)
    multitime = is_multi_time_req(timeidx)
    numfiles = _get_numfiles(wrfseq)
    maxtimes = _find_max_time_size(wrfseq)

    time_idx_or_slice = timeidx if not multitime else slice(None)
    file_times_less_than_max = False
    file_idx = 0

    # wrfseq might be a generator
    wrf_iter = iter(wrfseq)
    wrfnc = next(wrf_iter)
    numtimes = extract_dim(wrfnc, "Time")

    if xarray_enabled() and meta:
        first_var = _build_data_array(wrfnc, varname, ALL_TIMES, is_moving,
                                      True, _key)
        time_coord = np.full((numfiles, maxtimes), int(NaT), "datetime64[ns]")
        time_coord[file_idx, 0:numtimes] = first_var.coords["Time"][:]
    else:
        first_var = wrfnc.variables[varname][:]

    if numtimes < maxtimes:
        file_times_less_than_max = True

    # Out dimensions will be the number of files, maxtimes, then the
    # non-time shapes from the first variable
    outdims = [numfiles]
    outdims += [maxtimes]
    outdims += first_var.shape[1:]

    # For join, always need to start with full masked values
    outdata = np.full(outdims, Constants.DEFAULT_FILL, first_var.dtype)
    outdata[file_idx, 0:numtimes, :] = first_var[:]

    # Create the secondary coordinate arrays
    if xarray_enabled() and meta:
        latname, lonname, timename = _find_coord_names(first_var.coords)
        outcoorddims = outdims[0:2] + outdims[-2:]

        timecached = False
        latcached = False
        loncached = False
        projcached = False

        outxtimes = None
        outlats = None
        outlons = None
        outprojs = None

        timekey = timename+"_join" if timename is not None else None
        latkey = latname + "_join" if latname is not None else None
        lonkey = lonname + "_join" if lonname is not None else None
        projkey = "projection_join" if is_moving else None

        if timename is not None:
            outxtimes = get_cached_item(_key, timekey)
            if outxtimes is None:
                outxtimes = np.full(outdims[0:2], Constants.DEFAULT_FILL,
                                first_var.dtype)
                outxtimes[file_idx, 0:numtimes] = first_var.coords[timename][:]
            else:
                timecached = True

        if is_moving:
            if latname is not None:
                outlats = get_cached_item(_key, latkey)
                if outlats is None:
                    outlats = np.full(outcoorddims, Constants.DEFAULT_FILL,
                                  first_var.dtype)
                    outlats[file_idx, 0:numtimes, :] = (
                        first_var.coords[latname][:])
                else:
                    latcached = True

            if lonname is not None:
                outlons = get_cached_item(_key, lonkey)
                if outlons is None:
                    outlons = np.full(outcoorddims, Constants.DEFAULT_FILL,
                                  first_var.dtype)
                    outlons[file_idx, 0:numtimes, :] = (
                        first_var.coords[lonname][:])
                else:
                    loncached = True

            # Projections also need two dimensions

            outprojs = get_cached_item(_key, projkey)
            if outprojs is None:
                outprojs = np.full(outdims[0:2], NullProjection(), np.object)

                outprojs[file_idx, 0:numtimes] = np.asarray(
                                                first_var.attrs["projection"],
                                                np.object)[:]
            else:
                projcached = True


    file_idx=1
    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            numtimes = extract_dim(wrfnc, "Time")
            if numtimes < maxtimes:
                file_times_less_than_max = True
            outvar = wrfnc.variables[varname][:]

            if not multitime:
                outvar = outvar[np.newaxis, :]

            outdata[file_idx, 0:numtimes, :] = outvar[:]

            if xarray_enabled() and meta:
                # For join, the times are a function of fileidx
                file_times = extract_times(wrfnc, ALL_TIMES, meta=False,
                                           do_xtime=False)
                time_coord[file_idx, 0:numtimes] = np.asarray(file_times,
                                                        "datetime64[ns]")[:]

                if timename is not None and not timecached:
                    xtimedata = wrfnc.variables[timename][:]
                    outxtimes[file_idx, 0:numtimes] = xtimedata[:]

                if is_moving:
                    if latname is not None and not latcached:
                        latdata = wrfnc.variables[latname][:]
                        outlats[file_idx, 0:numtimes, :] = latdata[:]

                    if lonname is not None and not loncached:
                        londata = wrfnc.variables[lonname][:]
                        outlons[file_idx, 0:numtimes, :] = londata[:]

                    if not projcached:
                        lats, lons, proj_params = get_proj_params(wrfnc,
                                                                  ALL_TIMES,
                                                                  varname)
                        projs = [getproj(lats=lats[i,:],
                            lons=lons[i,:],
                            **proj_params) for i in py3range(lats.shape[0])]

                        outprojs[file_idx, 0:numtimes] = (
                            np.asarray(projs, np.object)[:])

            # Need to update coords here
            file_idx += 1

    # If any of the output files contain less than the max number of times,
    # then a mask array is needed to flag all the missing arrays with
    # missing values
    if file_times_less_than_max:
        outdata = np.ma.masked_values(outdata, Constants.DEFAULT_FILL)

    if xarray_enabled() and meta:
        # Cache the coords if applicable
        if not latcached and outlats is not None:
            cache_item(_key, latkey, outlats)
        if not loncached and outlons is not None:
            cache_item(_key, lonkey, outlons)
        if not projcached and outprojs is not None:
            cache_item(_key, projkey, outprojs)
        if not timecached and outxtimes is not None:
            cache_item(_key, timekey, outxtimes)

        outname = ucode(first_var.name)
        outcoords = OrderedDict(first_var.coords)
        outattrs = OrderedDict(first_var.attrs)
        # New dimensions
        outdimnames = ["file"] + list(first_var.dims)
        outcoords["file"] = [i for i in py3range(numfiles)]

        # Time needs to be multi dimensional, so use the default dimension
        del outcoords["Time"]

        time_coord = time_coord[:, time_idx_or_slice]
        if not multitime:
            time_coord = time_coord[:, np.newaxis]
        outcoords["datetime"] = outdimnames[0:2], time_coord

        if isinstance(outdata, np.ma.MaskedArray):
            outattrs["_FillValue"] = Constants.DEFAULT_FILL
            outattrs["missing_value"] = Constants.DEFAULT_FILL

        if timename is not None:
            outxtimes = outxtimes[:, time_idx_or_slice]
            if not multitime:
                outxtimes = outxtimes[:, np.newaxis]
            outcoords[timename] = outdimnames[0:2], outxtimes[:]

        # If the domain is moving, need to create the lat/lon coords
        # since they can't be copied
        if is_moving:
            outlatdims = outdimnames[0:2] + outdimnames[-2:]

            if latname is not None:
                outlats = outlats[:, time_idx_or_slice, :]
                if not multitime:
                    outlats = outlats[:, np.newaxis, :]
                outcoords[latname] = outlatdims, outlats
            if lonname is not None:
                outlons = outlons[:, time_idx_or_slice, :]
                if not multitime:
                    outlons = outlons[:, np.newaxis, :]
                outcoords[lonname] = outlatdims, outlons

            if not multitime:
                outattrs["projection"] = outprojs[:, timeidx]
            else:
                outattrs["projection"] = outprojs

        if not multitime:
            outdata = outdata[:, timeidx, :]
            outdata = outdata[:, np.newaxis, :]

        outarr = DataArray(outdata, name=outname, coords=outcoords,
                           dims=outdimnames, attrs=outattrs)

    else:
        if not multitime:
            outdata = outdata[:, timeidx, :]
            outdata = outdata[:, np.newaxis, :]

        outarr = outdata

    return outarr


def combine_files(wrfseq, varname, timeidx, is_moving=None,
                  method="cat", squeeze=True, meta=True,
                  _key=None):

    # Handles generators, single files, lists, tuples, custom classes
    wrfseq = get_iterable(wrfseq)

    # Dictionary is unique
    if is_mapping(wrfseq):
        outarr = _combine_dict(wrfseq, varname, timeidx, method, meta, _key)
    elif method.lower() == "cat":
        outarr = _cat_files(wrfseq, varname, timeidx, is_moving,
                            squeeze, meta, _key)
    elif method.lower() == "join":
        outarr = _join_files(wrfseq, varname, timeidx, is_moving, meta, _key)
    else:
        raise ValueError("method must be 'cat' or 'join'")

    return outarr.squeeze() if squeeze else outarr


# Cache is a dictionary of already extracted variables
def _extract_var(wrfnc, varname, timeidx, is_moving,
                 method, squeeze, cache, meta, _key):
    # Mainly used internally so variables don't get extracted multiple times,
    # particularly to copy metadata.  This can be slow.
    if cache is not None:
        try:
            cache_var = cache[varname]
        except KeyError:
            pass
        else:
            if not meta:
                return npvalues(cache_var)

            return cache_var

    multitime = is_multi_time_req(timeidx)
    multifile = is_multi_file(wrfnc)

    if is_time_coord_var(varname):
        return extract_times(wrfnc, timeidx, method, squeeze, cache,
                             meta, do_xtime=True)

    if not multifile:
        if xarray_enabled() and meta:
            if is_moving is None:
                is_moving = is_moving_domain(wrfnc, varname, _key=_key)
            result = _build_data_array(wrfnc, varname, timeidx, is_moving,
                                       multifile, _key)
        else:
            if not multitime:
                result = wrfnc.variables[varname][timeidx,:]
                result = result[np.newaxis, :] # So that no squeeze works
            else:
                result = wrfnc.variables[varname][:]
    else:
        # Squeeze handled in this routine, so just return it
        return combine_files(wrfnc, varname, timeidx, is_moving,
                             method, squeeze, meta, _key)

    return result.squeeze() if squeeze else result


def extract_vars(wrfnc, timeidx, varnames, method="cat", squeeze=True,
                 cache=None, meta=True, _key=None):
    if isstr(varnames):
        varlist = [varnames]
    else:
        varlist = varnames

    return {var:_extract_var(wrfnc, var, timeidx, None,
                             method, squeeze, cache, meta, _key)
            for var in varlist}

# Python 3 compatability
def npbytes_to_str(var):
    return (bytes(c).decode("utf-8") for c in var[:])


def _make_time(timearr):
    return dt.datetime.strptime("".join(npbytes_to_str(timearr)),
                                "%Y-%m-%d_%H:%M:%S")


def _file_times(wrfnc, do_xtime):
    if not do_xtime:
        times = wrfnc.variables["Times"][:,:]
        for i in py3range(times.shape[0]):
            yield _make_time(times[i,:])
    else:
        xtimes = wrfnc.variables["XTIME"][:]
        for i in py3range(xtimes.shape[0]):
            yield xtimes[i]


def _extract_time_map(wrfnc, timeidx, do_xtime, meta=False):
    return {key : extract_times(wrfseq, timeidx, do_xtime, meta)
            for key, wrfseq in viewitems(wrfnc)}


def extract_times(wrfnc, timeidx, method="cat", squeeze=True, cache=None,
                  meta=False, do_xtime=False):
    if is_mapping(wrfnc):
        return _extract_time_map(wrfnc, timeidx, do_xtime)

    multitime = is_multi_time_req(timeidx)
    multi_file = is_multi_file(wrfnc)
    if not multi_file:
        wrf_list = [wrfnc]
    else:
        wrf_list = wrfnc

    try:
        if method.lower() == "cat":
            time_list = [file_time
                         for wrf_file in wrf_list
                         for file_time in _file_times(wrf_file, do_xtime)]
        elif method.lower() == "join":
            time_list = [[file_time
                          for file_time in _file_times(wrf_file, do_xtime)]
                         for wrf_file in wrf_list]
        else:
            raise ValueError("invalid method argument '{}'".format(method))
    except KeyError:
        return None # Thrown for pre-3.7 XTIME not existing

    if xarray_enabled() and meta:
        outattrs = OrderedDict()
        outcoords = None

        if method.lower() == "cat":
            outdimnames = ["Time"]
        else:
            outdimnames = ["fileidx", "Time"]

        if not do_xtime:
            outname = "times"
            outattrs["description"] = "model times [np.datetime64]"

        else:
            ncfile = next(iter(wrf_list))
            var = ncfile.variables["XTIME"]
            outattrs.update(var.__dict__)

            outname = "XTIME"

        outarr = DataArray(time_list, name=outname, coords=outcoords,
                           dims=outdimnames, attrs=outattrs)

    else:
        outarr = np.asarray(time_list)

    if not multitime:
        return outarr[timeidx]

    return outarr


def is_standard_wrf_var(wrfnc, var):
    multifile = is_multi_file(wrfnc)
    if multifile:
        if not is_mapping(wrfnc):
            wrfnc = next(iter(wrfnc))
        else:
            entry = wrfnc[next(iter(viewkeys(wrfnc)))]
            return is_standard_wrf_var(entry, var)

    return var in wrfnc.variables


def is_staggered(var, wrfnc):
    we = extract_dim(wrfnc, "west_east")
    sn = extract_dim(wrfnc, "south_north")
    bt = extract_dim(wrfnc, "bottom_top")

    if (var.shape[-1] != we or var.shape[-2] != sn or var.shape[-3] != bt):
        return True

    return False


def get_left_indexes(ref_var, expected_dims):
    """Returns the extra left side dimensions for a variable with an expected
    shape.

    For example, if a 2D variable contains an additional left side dimension
    for time, this will return the time dimension size.

    """
    extra_dim_num = ref_var.ndim - expected_dims

    if (extra_dim_num == 0):
        return []

    return tuple([ref_var.shape[x] for x in py3range(extra_dim_num)])


def iter_left_indexes(dims):
    """A generator which yields the iteration tuples for a sequence of
    dimensions sizes.

    For example, if an array shape is (3,3), then this will yield:

    (0,0), (0,1), (1,0), (1,1)

    Arguments:

        - dims - a sequence of dimensions sizes (e.g. ndarry.shape)

    """
    arg = [py3range(dim) for dim in dims]
    for idxs in product(*arg):
        yield idxs


def get_right_slices(var, right_ndims, fixed_val=0):
    extra_dim_num = var.ndim - right_ndims
    if extra_dim_num == 0:
        return [slice(None)] * right_ndims

    return tuple([fixed_val]*extra_dim_num +
                 [slice(None)]*right_ndims)


def get_proj_params(wrfnc, timeidx=0, varname=None):
    proj_params = extract_global_attrs(wrfnc, attrs=("MAP_PROJ",
                                                "CEN_LAT", "CEN_LON",
                                                "TRUELAT1", "TRUELAT2",
                                                "MOAD_CEN_LAT", "STAND_LON",
                                                "POLE_LAT", "POLE_LON"))
    multitime = is_multi_time_req(timeidx)
    if not multitime:
        time_idx_or_slice = timeidx
    else:
        time_idx_or_slice = slice(None)

    if varname is not None:
        if not is_coordvar(varname):
            coord_names = getattr(wrfnc.variables[varname],
                                  "coordinates").split()
            lon_coord = coord_names[0]
            lat_coord = coord_names[1]
        else:
            lat_coord, lon_coord = get_coord_pairs(varname)
    else:
        lat_coord, lon_coord = latlon_coordvars(wrfnc.variables)

    return (wrfnc.variables[lat_coord][time_idx_or_slice,:],
            wrfnc.variables[lon_coord][time_idx_or_slice,:],
            proj_params)


def from_args(func, argnames, *args, **kwargs):
    """Parses the function args and kargs looking for the desired argument
    value. Otherwise, the value is taken from the default keyword argument
    using the arg spec.

    """
    if isstr(argnames):
        arglist = [argnames]
    else:
        arglist = argnames

    result = OrderedDict()
    for argname in arglist:
        arg_loc = arg_location(func, argname, args, kwargs)

        if arg_loc is not None:
            result[argname] = arg_loc[0][arg_loc[1]]
        else:
            result[argname] = None

    return result


def _args_to_list2(func, args, kwargs):
    argspec = getargspec(func)

    # Build the full tuple with defaults filled in
    outargs = [None]*len(argspec.args)
    if argspec.defaults is not None:
        for i,default in enumerate(argspec.defaults[::-1], 1):
            outargs[-i] = default

    # Add the supplied args
    for i,arg in enumerate(args):
        outargs[i] = arg

    # Fill in the supplied kargs
    for argname,val in viewitems(kwargs):
        argidx = argspec.args.index(argname)
        outargs[argidx] = val

    return outargs


def _args_to_list3(func, args, kwargs):
    sig = signature(func)
    bound = sig.bind(*args, **kwargs)
    bound.apply_defaults()

    return [x for x in bound.arguments.values()]


# Note:  Doesn't allow for **kwargs or *args
def args_to_list(func, args, kwargs):
    """Converts the mixed args/kwargs to a single list of args"""
    if version_info > (3,):
        _args_to_list = _args_to_list3
    else:
        _args_to_list = _args_to_list2

    return _args_to_list(func, args, kwargs)


def _arg_location2(func, argname, args, kwargs):
    argspec = getargspec(func)

    list_args = _args_to_list2(func, args, kwargs)

    # Return the new sequence and location
    if argname not in argspec.args and argname not in kwargs:
        return None

    result_idx = argspec.args.index(argname)

    return list_args, result_idx


def _arg_location3(func, argname, args, kwargs):
    sig = signature(func)
    params = list(sig.parameters.keys())

    list_args = _args_to_list3(func, args, kwargs)

    try:
        result_idx = params.index(argname)
    except ValueError:
        return None

    return list_args, result_idx


def arg_location(func, argname, args, kwargs):
    """Parses the function args, kargs and signature looking for the desired
    argument location (either in args, kargs, or argspec.defaults),
    and returns a list containing representing all arguments in the
    correct order with defaults filled in.

    """
    if version_info > (3,):
        _arg_location = _arg_location3
    else:
        _arg_location = _arg_location2

    return _arg_location(func, argname, args, kwargs)


def psafilepath():
    return os.path.join(os.path.dirname(__file__), "data", "psadilookup.dat")


def get_id(seq):
    if not is_mapping(seq):
        return id(seq)

    # For each key in the mapping, recurisvely call get_id until
    # until a non-mapping is found
    return {key : get_id(val) for key,val in viewitems(seq)}