wrf-python/src/wrf/util.py

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import os
from sys import version_info
from copy import copy
from collections import Iterable, Mapping, OrderedDict
from itertools import product
from types import GeneratorType
import datetime as dt
from math import floor, copysign
from inspect import getmodule

try:
    from inspect import signature
except ImportError:
    from inspect import getargspec

try:
    from inspect import getargvalues
except ImportError:
    from inspect import getgeneratorlocals

import numpy as np
import numpy.ma as ma

from .config import xarray_enabled
from .projection import getproj, NullProjection
from .constants import Constants, ALL_TIMES
from .py3compat import (viewitems, viewkeys, viewvalues, isstr, py2round,
                        py3range, ucode)
from .cache import cache_item, get_cached_item

if xarray_enabled():
    from xarray import DataArray
    from pandas import NaT


_COORD_PAIR_MAP = {"XLAT" : ("XLAT", "XLONG"),
                "XLONG" : ("XLAT", "XLONG"),
                "XLAT_M" : ("XLAT_M", "XLONG_M"),
                "XLONG_M" : ("XLAT_M", "XLONG_M"),
                "XLAT_U" : ("XLAT_U", "XLONG_U"),
                "XLONG_U" : ("XLAT_U", "XLONG_U"),
                "XLAT_V" : ("XLAT_V", "XLONG_V"),
                "XLONG_V" : ("XLAT_V", "XLONG_V"),
                "CLAT" : ("CLAT", "CLONG"),
                "CLONG" : ("CLAT", "CLONG")}


_COORD_VARS = ("XLAT", "XLONG", "XLAT_M", "XLONG_M", "XLAT_U", "XLONG_U",
               "XLAT_V", "XLONG_V", "CLAT", "CLONG")

_LAT_COORDS = ("XLAT",  "XLAT_M", "XLAT_U",  "XLAT_V",  "CLAT")

_LON_COORDS = ("XLONG", "XLONG_M", "XLONG_U","XLONG_V", "CLONG")

_TIME_COORD_VARS = ("XTIME",)


def is_time_coord_var(varname):
    return varname in _TIME_COORD_VARS


def get_coord_pairs(varname):
    return _COORD_PAIR_MAP[varname]


def is_multi_time_req(timeidx):
    return timeidx is None


def is_multi_file(wrfnc):
    return (isinstance(wrfnc, Iterable) and not isstr(wrfnc))

def has_time_coord(wrfnc):
    return "XTIME" in wrfnc.variables

def is_mapping(wrfnc):
    return isinstance(wrfnc, Mapping)

def _generator_copy(gen):
    funcname = gen.__name__
    try:
        argvals = getargvalues(gen.gi_frame)
    except NameError:
        argvals = getgeneratorlocals(gen)
    module = getmodule(gen.gi_frame)

    if module is not None:
        res = module.get(funcname)(**argvals.locals)
    else:
        # Created in jupyter or the python interpreter
        import __main__
        res = getattr(__main__, funcname)(**argvals.locals)

    return res

def test():
    q = [1,2,3]
    for i in q:
        yield i

class TestGen(object):
    def __init__(self, count=3):
        self._total = count
        self._i = 0

    def __iter__(self):
        return self

    def next(self):
        if self._i >= self._total:
            raise StopIteration
        else:
            val = self._i
            self._i += 1
            return val

    # Python 3
    def __next__(self):
        return self.next()

def latlon_coordvars(d):
    lat_coord = None
    lon_coord = None

    for name in _LAT_COORDS:
        if name in viewkeys(d):
            lat_coord = name
            break

    for name in _LON_COORDS:
        if name in viewkeys(d):
            lon_coord = name
            break

    return lat_coord, lon_coord

def is_coordvar(varname):
    return varname in _COORD_VARS

class IterWrapper(Iterable):
    """A wrapper class for generators and custom iterable classes which returns
    a new iterator from the start of the sequence when __iter__ is called"""
    def __init__(self, wrapped):
        self._wrapped = wrapped

    def __iter__(self):
        if isinstance(self._wrapped, GeneratorType):
            return _generator_copy(self._wrapped)
        else:
            obj_copy = copy(self._wrapped)
            return obj_copy.__iter__()


def get_iterable(wrfseq):
    """Returns a resetable iterable object."""
    if not is_multi_file(wrfseq):
        return wrfseq
    else:
        if not is_mapping(wrfseq):

            if isinstance(wrfseq, (list, tuple, IterWrapper)):
                return wrfseq
            else:
                return IterWrapper(wrfseq) # generator/custom iterable class

        else:
            if isinstance(wrfseq, dict):
                return wrfseq
            else:
                return dict(wrfseq) # generator/custom iterable class

# Helper to extract masked arrays from DataArrays that convert to NaN
def npvalues(array_type):
    if not isinstance(array_type, DataArray):
        result = array_type
    else:
        try:
            fill_value = array_type.attrs["_FillValue"]
        except KeyError:
            result = array_type.values
        else:
            result = ma.masked_invalid(array_type.values, copy=False)
            result.set_fill_value(fill_value)

    return result

# Helper utilities for metadata
class either(object):
    def __init__(self, *varnames):
        self.varnames = varnames

    def __call__(self, wrfnc):
        if is_multi_file(wrfnc):
            if not is_mapping(wrfnc):
                wrfnc = next(iter(wrfnc))
            else:
                entry = wrfnc[next(iter(viewkeys(wrfnc)))]
                return self(entry)

        for varname in self.varnames:
            if varname in wrfnc.variables:
                return varname

        raise ValueError("{} are not valid variable names".format(
                                                            self.varnames))

class combine_with(object):
    # Remove remove_idx first, then insert_idx is applied to removed set
    def __init__(self, varname, remove_dims=None, insert_before=None,
                 new_dimnames=None, new_coords=None):
        self.varname = varname
        self.remove_dims = remove_dims
        self.insert_before = insert_before
        self.new_dimnames = new_dimnames if new_dimnames is not None else []
        self.new_coords = (new_coords if new_coords is not None
                           else OrderedDict())

    def __call__(self, var):
        new_dims = list(var.dims)
        new_coords = OrderedDict(var.coords)

        if self.remove_dims is not None:
            for dim in self.remove_dims:
                new_dims.remove(dim)
                del new_coords[dim]

        if self.insert_before is not None:
            insert_idx = new_dims.index(self.insert_before)
            new_dims = (new_dims[0:insert_idx] + self.new_dimnames +
                    new_dims[insert_idx:])
        elif self.new_dimnames is not None:
            new_dims = self.new_dimnames

        if self.new_coords is not None:
            new_coords.update(self.new_coords)

        return new_dims, new_coords

# This should look like:
# [(0, (-3,-2)), # variable 1
# (1, -1)] # variable 2
class combine_dims(object):

    def __init__(self, pairs):
        self.pairs = pairs

    def __call__(self, *args):
        result = []
        for pair in self.pairs:
            var = args[pair[0]]
            dimidxs = pair[1]
            if isinstance(dimidxs, Iterable):
                for dimidx in dimidxs:
                    result.append(var.shape[dimidx])
            else:
                result.append(var.shape[dimidxs])

        return tuple(result)


class from_var(object):
    def __init__(self, varname, attribute):
        self.varname = varname
        self.attribute = attribute

    def __call__(self, wrapped, *args, **kwargs):
        vard = from_args(wrapped, (self.varname,), *args, **kwargs)

        var = None
        if vard is not None:
            var = vard[self.varname]

        if not isinstance(var, DataArray):
            return None

        return var.attrs.get(self.attribute, None)


def _corners_moved(wrfnc, first_ll_corner, first_ur_corner, latvar, lonvar):
    lats = wrfnc.variables[latvar]
    lons = wrfnc.variables[lonvar]

    # Need to check all times
    for i in py3range(lats.shape[-3]):
        start_idxs = [0]*len(lats.shape) # PyNIO does not support ndim
        start_idxs[-3] = i
        start_idxs = tuple(start_idxs)

        end_idxs = [-1]*len(lats.shape)
        end_idxs[-3] = i
        end_idxs = tuple(end_idxs)

        if (first_ll_corner[0] != lats[start_idxs] or
            first_ll_corner[1] != lons[start_idxs] or
            first_ur_corner[0] != lats[end_idxs] or
            first_ur_corner[1] != lons[end_idxs]):
            return True

    return False


def is_moving_domain(wrfseq, varname=None, latvar=either("XLAT", "XLAT_M"),
                      lonvar=either("XLONG", "XLONG_M"), _key=None):

    if isinstance(latvar, either):
        latvar = latvar(wrfseq)

    if isinstance(lonvar, either):
        lonvar = lonvar(wrfseq)

    # In case it's just a single file
    if not is_multi_file(wrfseq):
        wrfseq = [wrfseq]

    # Slow, but safe. Compare the corner points to the first item and see
    # any move.  User iterator protocol in case wrfseq is not a list/tuple.
    if not is_mapping(wrfseq):
        wrf_iter = iter(wrfseq)
        first_wrfnc = next(wrf_iter)
    else:
        # Currently only checking the first dict entry.
        dict_key = next(iter(viewkeys(wrfseq)))
        entry = wrfseq[dict_key]
        key = _key[dict_key] if _key is not None else None
        return is_moving_domain(entry, varname, latvar, lonvar, key)

    # The long way of checking all lat/lon corner points.  Doesn't appear
    # to be a shortcut in the netcdf files.
    if varname is not None:
        try:
            coord_names = getattr(first_wrfnc.variables[varname],
                              "coordinates").split()
        except AttributeError:
            # Variable doesn't have a coordinates attribute, use the
            # arguments
            lon_coord = lonvar
            lat_coord = latvar
        else:
            lon_coord = coord_names[0]
            lat_coord = coord_names[1]
    else:
        lon_coord = lonvar
        lat_coord = latvar

    # See if there is a cached value
    product = "is_moving_{}_{}".format(lat_coord, lon_coord)
    moving = get_cached_item(_key, product)
    if moving is not None:
        return moving

    # Need to search all the files
    lats = first_wrfnc.variables[lat_coord]
    lons = first_wrfnc.variables[lon_coord]

    zero_idxs = [0]*len(lats.shape)  # PyNIO doesn't have ndim
    last_idxs = list(zero_idxs)
    last_idxs[-2:] = [-1]*2

    zero_idxs = tuple(zero_idxs)
    last_idxs = tuple(last_idxs)

    lat0 = lats[zero_idxs]
    lat1 = lats[last_idxs]
    lon0 = lons[zero_idxs]
    lon1 = lons[last_idxs]

    ll_corner = (lat0, lon0)
    ur_corner = (lat1, lon1)

    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            if _corners_moved(wrfnc, ll_corner, ur_corner,
                              lat_coord, lon_coord):

                cache_item(_key, product, True)
                return True


    cache_item(_key, product, False)

    return False

def _get_global_attr(wrfnc, attr):
    val = getattr(wrfnc, attr, None)

    # PyNIO puts single values in to an array
    if isinstance(val, np.ndarray):
        if len(val) == 1:
            return val[0]
    return val

def extract_global_attrs(wrfnc, attrs):
    if isstr(attrs):
        attrlist = [attrs]
    else:
        attrlist = attrs

    multifile = is_multi_file(wrfnc)

    if multifile:
        if not is_mapping(wrfnc):
            wrfnc = next(iter(wrfnc))
        else:
            entry = wrfnc[next(iter(viewkeys(wrfnc)))]
            return extract_global_attrs(entry, attrs)

    return {attr:_get_global_attr(wrfnc, attr) for attr in attrlist}

def extract_dim(wrfnc, dim):
    if is_multi_file(wrfnc):
        if not is_mapping(wrfnc):
            wrfnc = next(iter(wrfnc))
        else:
            entry = wrfnc[next(iter(viewkeys(wrfnc)))]
            return extract_dim(entry, dim)

    d = wrfnc.dimensions[dim]
    if not isinstance(d, int):
        return len(d) #netCDF4
    return d # PyNIO

def _combine_dict(wrfdict, varname, timeidx, method, meta, _key):
    """Dictionary combination creates a new left index for each key, then
    does a cat or join for the list of files for that key"""
    keynames = []
    numkeys = len(wrfdict)

    key_iter = iter(viewkeys(wrfdict))
    first_key = next(key_iter)
    keynames.append(first_key)

    is_moving = is_moving_domain(wrfdict, varname, _key=_key)

    # Not quite sure how to handle coord caching with dictionaries, so
    # disabling it for now by setting _key to None.
    first_array = _extract_var(wrfdict[first_key], varname,
                              timeidx, is_moving=is_moving, method=method,
                              squeeze=False, cache=None, meta=meta,
                              _key=_key[first_key])


    # Create the output data numpy array based on the first array
    outdims = [numkeys]
    outdims += first_array.shape
    outdata = np.empty(outdims, first_array.dtype)
    outdata[0,:] = first_array[:]

    idx = 1
    while True:
        try:
            key = next(key_iter)
        except StopIteration:
            break
        else:
            keynames.append(key)
            vardata = _extract_var(wrfdict[key], varname, timeidx,
                                   is_moving=is_moving, method=method,
                                   squeeze=False, cache=None, meta=meta,
                                   _key=_key[key])

            if outdata.shape[1:] != vardata.shape:
                raise ValueError("data sequences must have the "
                                   "same size for all dictionary keys")
            outdata[idx,:] = npvalues(vardata)[:]
            idx += 1

    if xarray_enabled() and meta:
        outname = str(first_array.name)
        # Note: assumes that all entries in dict have same coords
        outcoords = OrderedDict(first_array.coords)

        # First find and store all the existing key coord names/values
        # This is applicable only if there are nested dictionaries.
        key_coordnames = []
        coord_vals = []
        existing_cnt = 0
        while True:
            key_coord_name = "key_{}".format(existing_cnt)

            if key_coord_name not in first_array.dims:
                break

            key_coordnames.append(key_coord_name)
            coord_vals.append(npvalues(first_array.coords[key_coord_name]))

            existing_cnt += 1

        # Now add the key coord name and values for THIS dictionary.
        # Put the new key_n name at the bottom, but the new values will
        # be at the top to be associated with key_0 (left most).  This
        # effectively shifts the existing 'key_n' coordinate values to the
        # right one dimension so *this* dicionary's key coordinate values
        # are at 'key_0'.
        key_coordnames.append(key_coord_name)
        coord_vals.insert(0, keynames)

        # make it so that key_0 is leftmost
        outdims = key_coordnames + list(first_array.dims[existing_cnt:])


        # Create the new 'key_n', value pairs
        for coordname, coordval in zip(key_coordnames, coord_vals):
            outcoords[coordname] = coordval


        outattrs = OrderedDict(first_array.attrs)

        outarr = DataArray(outdata, name=outname, coords=outcoords,
                           dims=outdims, attrs=outattrs)
    else:
        outarr = outdata

    return outarr


def _find_coord_names(coords):
    try:
        lat_coord = [name for name in _COORD_VARS[0::2] if name in coords][0]
    except IndexError:
        lat_coord = None

    try:
        lon_coord = [name for name in _COORD_VARS[1::2] if name in coords][0]
    except IndexError:
        lon_coord = None

    try:
        xtime_coord = [name for name in _TIME_COORD_VARS if name in coords][0]
    except IndexError:
        xtime_coord = None

    return lat_coord, lon_coord, xtime_coord


def _find_max_time_size(wrfseq):
    wrf_iter = iter(wrfseq)

    max_times = 0
    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            t = extract_dim(wrfnc, "Time")
            max_times = t if t >= max_times else max_times

    return max_times


def _build_data_array(wrfnc, varname, timeidx, is_moving_domain, is_multifile,
                      _key):

    # Note:  wrfnc is always a single netcdf file object
    # is_moving_domain and is_multifile are arguments indicating if the
    # single file came from a sequence, and if that sequence is has a moving
    # domain.  Both arguments are used mainly for coordinate extraction and
    # caching.
    multitime = is_multi_time_req(timeidx)
    time_idx_or_slice = timeidx if not multitime else slice(None)
    var = wrfnc.variables[varname]
    data = var[time_idx_or_slice, :]
    time_coord = None

    # Want to preserve the time dimension
    if not multitime:
        data = data[np.newaxis, :]

    attrs = OrderedDict(var.__dict__)
    dimnames = var.dimensions[-data.ndim:]

    # WRF variables will have a coordinates attribute.  MET_EM files have
    # a stagger attribute which indicates the coordinate variable.
    try:
        # WRF files
        coord_attr = getattr(var, "coordinates")
    except AttributeError:

        if is_coordvar(varname):
            # Coordinate variable (most likely XLAT or XLONG)
            lat_coord, lon_coord = get_coord_pairs(varname)
            time_coord = None

            if has_time_coord(wrfnc):
                time_coord = "XTIME"

        elif is_time_coord_var(varname):
            lon_coord = None
            lat_coord = None
            time_coord = None
        else:
            try:
                # met_em files
                stag_attr = getattr(var, "stagger")
            except AttributeError:
                lon_coord = None
                lat_coord = None
            else:
                # For met_em files, use the stagger name to get the lat/lon var
                lat_coord = "XLAT_{}".format(stag_attr)
                lon_coord = "XLONG_{}".format(stag_attr)
    else:
        coord_names = coord_attr.split()
        lon_coord = coord_names[0]
        lat_coord = coord_names[1]

        try:
            time_coord = coord_names[2]
        except IndexError:
            time_coord = None

    coords = OrderedDict()

    # Handle lat/lon coordinates and projection information if available
    if lon_coord is not None and lat_coord is not None:
        # Using a cache for coordinate variables so the extraction only happens
        # once.
        lon_coord_dimkey = lon_coord + "_dim"
        lon_coord_valkey = lon_coord + "_val"
        lat_coord_dimkey = lat_coord + "_dim"
        lat_coord_valkey = lat_coord + "_val"

        lon_coord_dims = get_cached_item(_key, lon_coord_dimkey)
        lon_coord_vals = get_cached_item(_key, lon_coord_valkey)
        if lon_coord_dims is None or lon_coord_vals is None:
            lon_var = wrfnc.variables[lon_coord]
            lon_coord_dims = lon_var.dimensions
            lon_coord_vals = lon_var[:]

            # Only cache here if the domain is not moving, otherwise
            # caching is handled by cat/join
            if not is_moving_domain:
                cache_item(_key, lon_coord_dimkey, lon_coord_dims)
                cache_item(_key, lon_coord_valkey, lon_coord_vals)

        lat_coord_dims = get_cached_item(_key, lat_coord_dimkey)
        lat_coord_vals = get_cached_item(_key, lat_coord_valkey)
        if lat_coord_dims is None or lat_coord_vals is None:
            lat_var = wrfnc.variables[lat_coord]
            lat_coord_dims = lat_var.dimensions
            lat_coord_vals = lat_var[:]

            # Only cache here if the domain is not moving, otherwise
            # caching is done in cat/join
            if not is_moving_domain:
                cache_item(_key, lat_coord_dimkey, lat_coord_dims)
                cache_item(_key, lat_coord_valkey, lat_coord_vals)

        time_coord_vals = None
        if time_coord is not None:
            # If not from a multifile sequence, then cache the time
             # coordinate.  Otherwise, handled in cat/join/
            if not is_multifile:
                time_coord_vals = get_cached_item(_key, time_coord)

                if time_coord_vals is None:
                    time_coord_vals = wrfnc.variables[time_coord][:]

                    if not is_multifile:
                        cache_item(_key, time_coord, time_coord_vals)
            else:
                time_coord_vals = wrfnc.variables[time_coord][:]

        if multitime:
            if is_moving_domain:
                # Special case with a moving domain in a multi-time file,
                # otherwise the projection parameters don't change
                coords[lon_coord] = lon_coord_dims, lon_coord_vals
                coords[lat_coord] = lat_coord_dims, lat_coord_vals

                # Returned lats/lons arrays will have a time dimension, so proj
                # will need to be a list due to moving corner points
                lats, lons, proj_params = get_proj_params(wrfnc,
                                                          timeidx,
                                                          varname)
                proj = [getproj(lats=lats[i,:],
                            lons=lons[i,:],
                            **proj_params) for i in py3range(lats.shape[0])]

            else:
                coords[lon_coord] = (lon_coord_dims[1:],
                                     lon_coord_vals[0,:])
                coords[lat_coord] = (lat_coord_dims[1:],
                                     lat_coord_vals[0,:])

                # Domain not moving, so just get the first time
                lats, lons, proj_params = get_proj_params(wrfnc, 0, varname)
                proj = getproj(lats=lats, lons=lons, **proj_params)

            if time_coord is not None:
                coords[time_coord] = (lon_coord_dims[0], time_coord_vals)
        else:
            coords[lon_coord] = (lon_coord_dims[1:],
                                 lon_coord_vals[timeidx,:])
            coords[lat_coord] = (lat_coord_dims[1:],
                                 lat_coord_vals[timeidx,:])

            if time_coord is not None:
                coords[time_coord] = (lon_coord_dims[0],
                                      [time_coord_vals[timeidx]])


            lats, lons, proj_params = get_proj_params(wrfnc, 0, varname)
            proj = getproj(lats=lats, lons=lons, **proj_params)

        attrs["projection"] = proj


    if dimnames[0] == "Time":
        t = extract_times(wrfnc, timeidx, meta=False, do_xtime=False)
        if not multitime:
            t = [t]
        coords[dimnames[0]] = t

    data_array = DataArray(data, name=varname, dims=dimnames, coords=coords,
                           attrs=attrs)


    return data_array


def _find_forward(wrfseq, varname, timeidx, is_moving, meta, _key):

    wrf_iter = iter(wrfseq)
    comboidx = 0

    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            numtimes = extract_dim(wrfnc, "Time")

            if timeidx < comboidx + numtimes:
                filetimeidx = timeidx - comboidx

                if meta:
                    return _build_data_array(wrfnc, varname, filetimeidx,
                                             is_moving, True, _key)
                else:
                    result = wrfnc.variables[varname][filetimeidx, :]
                    return result[np.newaxis, :]  # So that nosqueeze works

            else:
                comboidx += numtimes

    raise IndexError("timeidx {} is out of bounds".format(timeidx))


def _find_reverse(wrfseq, varname, timeidx, is_moving, meta, _key):
    try:
        revwrfseq = reversed(wrfseq)
    except TypeError:
        revwrfseq = reversed(list(wrfseq))

    wrf_iter = iter(revwrfseq)
    revtimeidx = -timeidx - 1

    comboidx = 0

    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            numtimes = extract_dim(wrfnc, "Time")

            if revtimeidx < comboidx + numtimes:
                # Finds the "forward" sequence index, then counts that
                # number back from the back of the ncfile times,
                # since the ncfile  needs to be iterated backwards as well.
                filetimeidx = numtimes - (revtimeidx - comboidx) - 1

                if meta:
                    return _build_data_array(wrfnc, varname, filetimeidx,
                                             is_moving, True, _key)
                else:
                    result = wrfnc.variables[varname][filetimeidx, :]
                    return result[np.newaxis, :] # So that nosqueeze works
            else:
                comboidx += numtimes

    raise IndexError("timeidx {} is out of bounds".format(timeidx))


def _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta, _key):
    if timeidx >= 0:
        return _find_forward(wrfseq, varname, timeidx, is_moving, meta, _key)
    else:
        return _find_reverse(wrfseq, varname, timeidx, is_moving, meta, _key)

# TODO:  implement in C
def _cat_files(wrfseq, varname, timeidx, is_moving, squeeze, meta, _key):
    if is_moving is None:
        is_moving = is_moving_domain(wrfseq, varname, _key=_key)

    file_times = extract_times(wrfseq, ALL_TIMES, meta=False, do_xtime=False)

    multitime = is_multi_time_req(timeidx)

    # For single times, just need to find the ncfile and appropriate
    # time index, and return that array
    if not multitime:
        return _find_arr_for_time(wrfseq, varname, timeidx, is_moving, meta,
                                  _key)

    #time_idx_or_slice = timeidx if not multitime else slice(None)

    # If all times are requested, need to build a new array and cat together
    # all of the arrays in the sequence
    wrf_iter = iter(wrfseq)

    if xarray_enabled() and meta:
        first_var = _build_data_array(next(wrf_iter), varname,
                                      ALL_TIMES, is_moving, True, _key)
    else:
        first_var = (next(wrf_iter)).variables[varname][:]

    outdims = [len(file_times)]

    # Making a new time dim, so ignore this one
    outdims += first_var.shape[1:]

    outdata = np.empty(outdims, first_var.dtype)

    numtimes = first_var.shape[0]
    startidx = 0
    endidx = numtimes

    outdata[startidx:endidx, :] = first_var[:]

    if xarray_enabled() and meta:
        latname, lonname, timename = _find_coord_names(first_var.coords)

        timecached = False
        latcached = False
        loncached = False
        projcached = False

        outxtimes = None
        outlats = None
        outlons = None
        outprojs = None

        timekey = timename+"_cat" if timename is not None else None
        latkey = latname + "_cat" if latname is not None else None
        lonkey = lonname + "_cat" if lonname is not None else None
        projkey = "projection_cat" if is_moving else None

        if timename is not None:
            outxtimes = get_cached_item(_key, timekey)
            if outxtimes is None:
                outxtimes = np.empty(outdims[0])
                outxtimes[startidx:endidx] = npvalues(first_var.coords[timename][:])
            else:
                timecached = True

        if is_moving:
            outcoorddims = outdims[0:1] + outdims[-2:]

            if latname is not None:
                # Try to pull from the coord cache
                outlats = get_cached_item(_key, latkey)
                if outlats is None:
                    outlats = np.empty(outcoorddims, first_var.dtype)
                    outlats[startidx:endidx, :] = npvalues(first_var.coords[latname][:])
                else:
                    latcached = True

            if lonname is not None:
                outlons = get_cached_item(_key, lonkey)
                if outlons is None:
                    outlons = np.empty(outcoorddims, first_var.dtype)
                    outlons[startidx:endidx, :] = npvalues(first_var.coords[lonname][:])
                else:
                    loncached = True

            # Projections also need to be aggregated

            outprojs = get_cached_item(_key, projkey)
            if outprojs is None:
                outprojs = np.empty(outdims[0], np.object)
                outprojs[startidx:endidx] = np.asarray(
                    first_var.attrs["projection"], np.object)[:]
            else:
                projcached = True


    startidx = endidx
    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            vardata = wrfnc.variables[varname][:]

            numtimes = vardata.shape[0]

            endidx = startidx + numtimes

            outdata[startidx:endidx, :] = vardata[:]

            if xarray_enabled() and meta:
                # XTIME new in 3.7
                if timename is not None and not timecached:
                    xtimedata = wrfnc.variables[timename][:]
                    outxtimes[startidx:endidx] = xtimedata[:]

                if is_moving:
                    if latname is not None and not latcached:
                        latdata = wrfnc.variables[latname][:]
                        outlats[startidx:endidx, :] = latdata[:]

                    if lonname is not None and not loncached:
                        londata = wrfnc.variables[lonname][:]
                        outlons[startidx:endidx, :] = londata[:]

                    if not projcached:
                        lats, lons, proj_params = get_proj_params(wrfnc,
                                                              ALL_TIMES,
                                                              varname)
                        projs = [getproj(lats=lats[i,:],
                            lons=lons[i,:],
                            **proj_params) for i in py3range(lats.shape[0])]

                        outprojs[startidx:endidx] = np.asarray(projs,
                                                               np.object)[:]

            startidx = endidx

    if xarray_enabled() and meta:

        # Cache the coords if applicable
        if not latcached and outlats is not None:
            cache_item(_key, latkey, outlats)
        if not loncached and outlons is not None:
            cache_item(_key, lonkey, outlons)
        if not projcached and outprojs is not None:
            cache_item(_key, projkey, outprojs)
        if not timecached and outxtimes is not None:
            cache_item(_key, timekey, outxtimes)

        outname = ucode(first_var.name)
        outattrs = OrderedDict(first_var.attrs)
        outcoords = OrderedDict(first_var.coords)
        outdimnames = list(first_var.dims)

        if "Time" not in outdimnames:
            outdimnames.insert(0, "Time")

        if not multitime:
            file_times = [file_times[timeidx]]

        outcoords[outdimnames[0]] = file_times

        outcoords["datetime"] = outdimnames[0], file_times

        if timename is not None:
            outxtimes = outxtimes[:]
            outcoords[timename] = outdimnames[0], outxtimes

        # If the domain is moving, need to create the lat/lon coords
        # since they can't be copied
        if is_moving:
            outlatdims = [outdimnames[0]] + outdimnames[-2:]

            if latname is not None:
                outlats = outlats[:]
                outcoords[latname] = outlatdims, outlats
            if lonname is not None:
                outlons = outlons[:]
                outcoords[lonname] = outlatdims, outlons

            outattrs["projection"] = outprojs[:]

        outdata = outdata[:]

        outarr = DataArray(outdata, name=outname, coords=outcoords,
                           dims=outdimnames, attrs=outattrs)

    else:
        outdata = outdata[:]
        outarr = outdata

    return outarr


def _get_numfiles(wrfseq):
    try:
        return len(wrfseq)
    except TypeError:
        wrf_iter = iter(wrfseq)
        return sum(1 for _ in wrf_iter)


# TODO:  implement in C
def _join_files(wrfseq, varname, timeidx, is_moving, meta, _key):
    if is_moving is None:
        is_moving = is_moving_domain(wrfseq, varname, _key=_key)
    multitime = is_multi_time_req(timeidx)
    numfiles = _get_numfiles(wrfseq)
    maxtimes = _find_max_time_size(wrfseq)

    time_idx_or_slice = timeidx if not multitime else slice(None)
    file_times_less_than_max = False
    file_idx = 0

    # wrfseq might be a generator
    wrf_iter = iter(wrfseq)
    wrfnc = next(wrf_iter)
    numtimes = extract_dim(wrfnc, "Time")

    if xarray_enabled() and meta:
        first_var = _build_data_array(wrfnc, varname, ALL_TIMES, is_moving,
                                      True, _key)
        time_coord = np.full((numfiles, maxtimes), int(NaT), "datetime64[ns]")
        time_coord[file_idx, 0:numtimes] = first_var.coords["Time"][:]
    else:
        first_var = wrfnc.variables[varname][:]

    if numtimes < maxtimes:
        file_times_less_than_max = True

    # Out dimensions will be the number of files, maxtimes, then the
    # non-time shapes from the first variable
    outdims = [numfiles]
    outdims += [maxtimes]
    outdims += first_var.shape[1:]

    # For join, always need to start with full masked values
    outdata = np.full(outdims, Constants.DEFAULT_FILL, first_var.dtype)
    outdata[file_idx, 0:numtimes, :] = first_var[:]

    # Create the secondary coordinate arrays
    if xarray_enabled() and meta:
        latname, lonname, timename = _find_coord_names(first_var.coords)
        outcoorddims = outdims[0:2] + outdims[-2:]

        timecached = False
        latcached = False
        loncached = False
        projcached = False

        outxtimes = None
        outlats = None
        outlons = None
        outprojs = None

        timekey = timename+"_join" if timename is not None else None
        latkey = latname + "_join" if latname is not None else None
        lonkey = lonname + "_join" if lonname is not None else None
        projkey = "projection_join" if is_moving else None

        if timename is not None:
            outxtimes = get_cached_item(_key, timekey)
            if outxtimes is None:
                outxtimes = np.full(outdims[0:2], Constants.DEFAULT_FILL,
                                first_var.dtype)
                outxtimes[file_idx, 0:numtimes] = first_var.coords[timename][:]
            else:
                timecached = True

        if is_moving:
            if latname is not None:
                outlats = get_cached_item(_key, latkey)
                if outlats is None:
                    outlats = np.full(outcoorddims, Constants.DEFAULT_FILL,
                                  first_var.dtype)
                    outlats[file_idx, 0:numtimes, :] = (
                        first_var.coords[latname][:])
                else:
                    latcached = True

            if lonname is not None:
                outlons = get_cached_item(_key, lonkey)
                if outlons is None:
                    outlons = np.full(outcoorddims, Constants.DEFAULT_FILL,
                                  first_var.dtype)
                    outlons[file_idx, 0:numtimes, :] = (
                        first_var.coords[lonname][:])
                else:
                    loncached = True

            # Projections also need two dimensions

            outprojs = get_cached_item(_key, projkey)
            if outprojs is None:
                outprojs = np.full(outdims[0:2], NullProjection(), np.object)

                outprojs[file_idx, 0:numtimes] = np.asarray(
                                                first_var.attrs["projection"],
                                                np.object)[:]
            else:
                projcached = True


    file_idx=1
    while True:
        try:
            wrfnc = next(wrf_iter)
        except StopIteration:
            break
        else:
            numtimes = extract_dim(wrfnc, "Time")
            if numtimes < maxtimes:
                file_times_less_than_max = True
            outvar = wrfnc.variables[varname][:]

            if not multitime:
                outvar = outvar[np.newaxis, :]

            outdata[file_idx, 0:numtimes, :] = outvar[:]

            if xarray_enabled() and meta:
                # For join, the times are a function of fileidx
                file_times = extract_times(wrfnc, ALL_TIMES, meta=False,
                                           do_xtime=False)
                time_coord[file_idx, 0:numtimes] = np.asarray(file_times,
                                                        "datetime64[ns]")[:]

                if timename is not None and not timecached:
                    xtimedata = wrfnc.variables[timename][:]
                    outxtimes[file_idx, 0:numtimes] = xtimedata[:]

                if is_moving:
                    if latname is not None and not latcached:
                        latdata = wrfnc.variables[latname][:]
                        outlats[file_idx, 0:numtimes, :] = latdata[:]

                    if lonname is not None and not loncached:
                        londata = wrfnc.variables[lonname][:]
                        outlons[file_idx, 0:numtimes, :] = londata[:]

                    if not projcached:
                        lats, lons, proj_params = get_proj_params(wrfnc,
                                                                  ALL_TIMES,
                                                                  varname)
                        projs = [getproj(lats=lats[i,:],
                            lons=lons[i,:],
                            **proj_params) for i in py3range(lats.shape[0])]

                        outprojs[file_idx, 0:numtimes] = (
                            np.asarray(projs, np.object)[:])

            # Need to update coords here
            file_idx += 1

    # If any of the output files contain less than the max number of times,
    # then a mask array is needed to flag all the missing arrays with
    # missing values
    if file_times_less_than_max:
        outdata = np.ma.masked_values(outdata, Constants.DEFAULT_FILL)

    if xarray_enabled() and meta:
        # Cache the coords if applicable
        if not latcached and outlats is not None:
            cache_item(_key, latkey, outlats)
        if not loncached and outlons is not None:
            cache_item(_key, lonkey, outlons)
        if not projcached and outprojs is not None:
            cache_item(_key, projkey, outprojs)
        if not timecached and outxtimes is not None:
            cache_item(_key, timekey, outxtimes)

        outname = ucode(first_var.name)
        outcoords = OrderedDict(first_var.coords)
        outattrs = OrderedDict(first_var.attrs)
        # New dimensions
        outdimnames = ["file"] + list(first_var.dims)
        outcoords["file"] = [i for i in py3range(numfiles)]

        # Time needs to be multi dimensional, so use the default dimension
        del outcoords["Time"]

        time_coord = time_coord[:, time_idx_or_slice]
        if not multitime:
            time_coord = time_coord[:, np.newaxis]
        outcoords["datetime"] = outdimnames[0:2], time_coord

        if isinstance(outdata, np.ma.MaskedArray):
            outattrs["_FillValue"] = Constants.DEFAULT_FILL
            outattrs["missing_value"] = Constants.DEFAULT_FILL

        if timename is not None:
            outxtimes = outxtimes[:, time_idx_or_slice]
            if not multitime:
                outxtimes = outxtimes[:, np.newaxis]
            outcoords[timename] = outdimnames[0:2], outxtimes[:]

        # If the domain is moving, need to create the lat/lon coords
        # since they can't be copied
        if is_moving:
            outlatdims = outdimnames[0:2] + outdimnames[-2:]

            if latname is not None:
                outlats = outlats[:, time_idx_or_slice, :]
                if not multitime:
                    outlats = outlats[:, np.newaxis, :]
                outcoords[latname] = outlatdims, outlats
            if lonname is not None:
                outlons = outlons[:, time_idx_or_slice, :]
                if not multitime:
                    outlons = outlons[:, np.newaxis, :]
                outcoords[lonname] = outlatdims, outlons

            if not multitime:
                outattrs["projection"] = outprojs[:, timeidx]
            else:
                outattrs["projection"] = outprojs

        if not multitime:
            outdata = outdata[:, timeidx, :]
            outdata = outdata[:, np.newaxis, :]

        outarr = DataArray(outdata, name=outname, coords=outcoords,
                           dims=outdimnames, attrs=outattrs)

    else:
        if not multitime:
            outdata = outdata[:, timeidx, :]
            outdata = outdata[:, np.newaxis, :]

        outarr = outdata

    return outarr

def combine_files(wrfseq, varname, timeidx, is_moving=None,
                  method="cat", squeeze=True, meta=True,
                  _key=None):

    # Handles generators, single files, lists, tuples, custom classes
    wrfseq = get_iterable(wrfseq)

    # Dictionary is unique
    if is_mapping(wrfseq):
        outarr = _combine_dict(wrfseq, varname, timeidx, method, meta, _key)
    elif method.lower() == "cat":
        outarr = _cat_files(wrfseq, varname, timeidx, is_moving,
                            squeeze, meta, _key)
    elif method.lower() == "join":
        outarr = _join_files(wrfseq, varname, timeidx, is_moving, meta, _key)
    else:
        raise ValueError("method must be 'cat' or 'join'")

    return outarr.squeeze() if squeeze else outarr


# Cache is a dictionary of already extracted variables
def _extract_var(wrfnc, varname, timeidx, is_moving,
                 method, squeeze, cache, meta, _key):
    # Mainly used internally so variables don't get extracted multiple times,
    # particularly to copy metadata.  This can be slow.
    if cache is not None:
        try:
            cache_var = cache[varname]
        except KeyError:
            pass
        else:
            if not meta:
                return npvalues(cache_var)

            return cache_var

    multitime = is_multi_time_req(timeidx)
    multifile = is_multi_file(wrfnc)

    if is_time_coord_var(varname):
        return extract_times(wrfnc, timeidx, method, squeeze, cache,
                             meta, do_xtime=True)

    if not multifile:
        if xarray_enabled() and meta:
            if is_moving is None:
                is_moving = is_moving_domain(wrfnc, varname, _key=_key)
            result = _build_data_array(wrfnc, varname, timeidx, is_moving,
                                       multifile, _key)
        else:
            if not multitime:
                result = wrfnc.variables[varname][timeidx,:]
                result = result[np.newaxis, :] # So that no squeeze works
            else:
                result = wrfnc.variables[varname][:]
    else:
        # Squeeze handled in this routine, so just return it
        return combine_files(wrfnc, varname, timeidx, is_moving,
                             method, squeeze, meta, _key)

    return result.squeeze() if squeeze else result


def extract_vars(wrfnc, timeidx, varnames, method="cat", squeeze=True,
                 cache=None, meta=True, _key=None):
    if isstr(varnames):
        varlist = [varnames]
    else:
        varlist = varnames

    return {var:_extract_var(wrfnc, var, timeidx, None,
                             method, squeeze, cache, meta, _key)
            for var in varlist}

# Python 3 compatability
def npbytes_to_str(var):
    return (bytes(c).decode("utf-8") for c in var[:])


def _make_time(timearr):
    return dt.datetime.strptime("".join(npbytes_to_str(timearr)),
                                "%Y-%m-%d_%H:%M:%S")


def _file_times(wrfnc, do_xtime):
    if not do_xtime:
        times = wrfnc.variables["Times"][:,:]
        for i in py3range(times.shape[0]):
            yield _make_time(times[i,:])
    else:
        xtimes = wrfnc.variables["XTIME"][:]
        for i in py3range(xtimes.shape[0]):
            yield xtimes[i]


def _extract_time_map(wrfnc, timeidx, do_xtime, meta=False):
    return {key : extract_times(wrfseq, timeidx, do_xtime, meta)
            for key, wrfseq in viewitems(wrfnc)}


def extract_times(wrfnc, timeidx, method="cat", squeeze=True, cache=None,
                  meta=False, do_xtime=False):
    if is_mapping(wrfnc):
        return _extract_time_map(wrfnc, timeidx, do_xtime)

    multitime = is_multi_time_req(timeidx)
    multi_file = is_multi_file(wrfnc)
    if not multi_file:
        wrf_list = [wrfnc]
    else:
        wrf_list = wrfnc

    try:
        if method.lower() == "cat":
            time_list = [file_time
                         for wrf_file in wrf_list
                         for file_time in _file_times(wrf_file, do_xtime)]
        elif method.lower() == "join":
            time_list = [[file_time
                          for file_time in _file_times(wrf_file, do_xtime)]
                         for wrf_file in wrf_list]
        else:
            raise ValueError("invalid method argument '{}'".format(method))
    except KeyError:
        return None # Thrown for pre-3.7 XTIME not existing

    if xarray_enabled() and meta:
        outattrs = OrderedDict()
        outcoords = None

        if method.lower() == "cat":
            outdimnames = ["Time"]
        else:
            outdimnames = ["fileidx", "Time"]

        if not do_xtime:
            outname = "times"
            outattrs["description"] = "model times [np.datetime64]"

        else:
            ncfile = next(iter(wrf_list))
            var = ncfile.variables["XTIME"]
            outattrs.update(var.__dict__)

            outname = "XTIME"

        outarr = DataArray(time_list, name=outname, coords=outcoords,
                           dims=outdimnames, attrs=outattrs)

    else:
        outarr = np.asarray(time_list)

    if not multitime:
        return outarr[timeidx]

    return outarr


def is_standard_wrf_var(wrfnc, var):
    multifile = is_multi_file(wrfnc)
    if multifile:
        if not is_mapping(wrfnc):
            wrfnc = next(iter(wrfnc))
        else:
            entry = wrfnc[next(iter(viewkeys(wrfnc)))]
            return is_standard_wrf_var(entry, var)

    return var in wrfnc.variables


def is_staggered(var, wrfnc):
    we = extract_dim(wrfnc, "west_east")
    sn = extract_dim(wrfnc, "south_north")
    bt = extract_dim(wrfnc, "bottom_top")

    if (var.shape[-1] != we or var.shape[-2] != sn or var.shape[-3] != bt):
        return True

    return False


def get_left_indexes(ref_var, expected_dims):
    """Returns the extra left side dimensions for a variable with an expected
    shape.

    For example, if a 2D variable contains an additional left side dimension
    for time, this will return the time dimension size.

    """
    extra_dim_num = ref_var.ndim - expected_dims

    if (extra_dim_num == 0):
        return []

    return tuple([ref_var.shape[x] for x in py3range(extra_dim_num)])

def iter_left_indexes(dims):
    """A generator which yields the iteration tuples for a sequence of
    dimensions sizes.

    For example, if an array shape is (3,3), then this will yield:

    (0,0), (0,1), (1,0), (1,1)

    Arguments:

        - dims - a sequence of dimensions sizes (e.g. ndarry.shape)

    """
    arg = [py3range(dim) for dim in dims]
    for idxs in product(*arg):
        yield idxs

def get_right_slices(var, right_ndims, fixed_val=0):
    extra_dim_num = var.ndim - right_ndims
    if extra_dim_num == 0:
        return [slice(None)] * right_ndims

    return tuple([fixed_val]*extra_dim_num +
                 [slice(None)]*right_ndims)

def get_proj_params(wrfnc, timeidx=0, varname=None):
    proj_params = extract_global_attrs(wrfnc, attrs=("MAP_PROJ",
                                                "CEN_LAT", "CEN_LON",
                                                "TRUELAT1", "TRUELAT2",
                                                "MOAD_CEN_LAT", "STAND_LON",
                                                "POLE_LAT", "POLE_LON"))
    multitime = is_multi_time_req(timeidx)
    if not multitime:
        time_idx_or_slice = timeidx
    else:
        time_idx_or_slice = slice(None)

    if varname is not None:
        if not is_coordvar(varname):
            coord_names = getattr(wrfnc.variables[varname],
                                  "coordinates").split()
            lon_coord = coord_names[0]
            lat_coord = coord_names[1]
        else:
            lat_coord, lon_coord = get_coord_pairs(varname)
    else:
        lat_coord, lon_coord = latlon_coordvars(wrfnc.variables)

    return (wrfnc.variables[lat_coord][time_idx_or_slice,:],
            wrfnc.variables[lon_coord][time_idx_or_slice,:],
            proj_params)


class CoordPair(object):
    def __init__(self, x=None, y=None, i=None, j=None, lat=None, lon=None):
        self.x = x
        self.y = y
        self.i = i
        self.j = j
        self.lat = lat
        self.lon = lon

    def __repr__(self):
        args = []
        if self.x is not None:
            args.append("x={}".format(self.x))
            args.append("y={}".format(self.y))

        if self.i is not None:
            args.append("i={}".format(self.i))
            args.append("j={}".format(self.j))

        if self.lat is not None:
            args.append("lat={}".format(self.lat))
            args.append("lon={}".format(self.lon))

        argstr = ", ".join(args)

        return "{}({})".format(self.__class__.__name__, argstr)

    def __str__(self):
        return self.__repr__()

    def xy_str(self, fmt="{:.4f}, {:.4f}"):
        if self.x is None or self.y is None:
            return None

        return fmt.format(self.x, self.y)

    def latlon_str(self, fmt="{:.4f}, {:.4f}"):
        if self.lat is None or self.lon is None:
            return None

        return fmt.format(self.lat, self.lon)

    def ij_str(self, fmt="{:.4f}, {:.4f}"):
        if self.i is None or self.j is None:
            return None

        return fmt.format(self.i, self.j)


def from_args(func, argnames, *args, **kwargs):
    """Parses the function args and kargs looking for the desired argument
    value. Otherwise, the value is taken from the default keyword argument
    using the arg spec.

    """
    if isstr(argnames):
        arglist = [argnames]
    else:
        arglist = argnames

    result = {}
    for argname in arglist:
        arg_loc = arg_location(func, argname, args, kwargs)

        if arg_loc is not None:
            result[argname] = arg_loc[0][arg_loc[1]]
        else:
            result[argname] = None

    return result

def _args_to_list2(func, args, kwargs):
    argspec = getargspec(func)

    # Build the full tuple with defaults filled in
    outargs = [None]*len(argspec.args)
    if argspec.defaults is not None:
        for i,default in enumerate(argspec.defaults[::-1], 1):
            outargs[-i] = default

    # Add the supplied args
    for i,arg in enumerate(args):
        outargs[i] = arg

    # Fill in the supplied kargs
    for argname,val in viewitems(kwargs):
        argidx = argspec.args.index(argname)
        outargs[argidx] = val

    return outargs

def _args_to_list3(func, args, kwargs):
    sig = signature(func)
    bound = sig.bind(*args, **kwargs)
    bound.apply_defaults()

    return [x for x in bound.arguments.values()]


def args_to_list(func, args, kwargs):
    """Converts the mixed args/kwargs to a single list of args"""
    if version_info > (3,):
        _args_to_list = _args_to_list3
    else:
        _args_to_list = _args_to_list2

    return _args_to_list(func, args, kwargs)


def _arg_location2(func, argname, args, kwargs):
    argspec = getargspec(func)

    list_args = _args_to_list2(func, args, kwargs)

    # Return the new sequence and location
    if argname not in argspec.args and argname not in kwargs:
        return None

    result_idx = argspec.args.index(argname)

    return list_args, result_idx

def _arg_location3(func, argname, args, kwargs):
    sig = signature(func)
    params = list(sig.parameters.keys())

    list_args = _args_to_list3(func, args, kwargs)

    try:
        result_idx = params.index(argname)
    except ValueError:
        return None

    return list_args, result_idx


def arg_location(func, argname, args, kwargs):
    """Parses the function args, kargs and signature looking for the desired
    argument location (either in args, kargs, or argspec.defaults),
    and returns a list containing representing all arguments in the
    correct order with defaults filled in.

    """
    if version_info > (3,):
        _arg_location = _arg_location3
    else:
        _arg_location = _arg_location2

    return _arg_location(func, argname, args, kwargs)


def psafilepath():
    return os.path.join(os.path.dirname(__file__), "data", "psadilookup.dat")


def get_id(seq):
    if not is_mapping(seq):
        return id(seq)

    # For each key in the mapping, recurisvely call get_id until
    # until a non-mapping is found
    return {key : get_id(val) for key,val in viewitems(seq)}