From 2e15863782e560ff2700b4316623b53c252d687b Mon Sep 17 00:00:00 2001 From: Bill Ladwig Date: Fri, 29 Mar 2019 15:47:39 -0600 Subject: [PATCH] Added misc notebooks --- doc/source/contrib.rst | 64 ++++---- doc/source/internals.rst | 289 +++++++++++++++++---------------- test/ipynb/loop_and_fill.ipynb | 141 ++++++++++++++++ test/ipynb/reduce_files.ipynb | 259 +++++++++++++++++++++++++++++ 4 files changed, 580 insertions(+), 173 deletions(-) create mode 100644 test/ipynb/loop_and_fill.ipynb create mode 100644 test/ipynb/reduce_files.ipynb diff --git a/doc/source/contrib.rst b/doc/source/contrib.rst index f08237b..b316145 100644 --- a/doc/source/contrib.rst +++ b/doc/source/contrib.rst @@ -113,7 +113,7 @@ below. 2. Follow the :ref:`fortranstyle`. 3. Please only submit routines relevant to WRF-Python (e.g. diagnostics, - interpolation). General purpose climate/meteorology should go in the + interpolation). General purpose climate/meteorology should go in to the SkyLab project (a project providing similar functionality as NCL). @@ -123,11 +123,11 @@ below. 5. Place your code in the fortran/contrib directory in the WRF-Python source tree. -6. Document your code with a text file that has same name as your Fortran +6. Document your code with a text file that has the same name as your Fortran file, but ending in .rst. This file should placed with your F90 code in the fortran/contrib directory. Your documentation can use restructured text formatting, or just plain text. This documentation - will be used in the docstring when Python wrappers are made. + will be used for the docstring when Python wrappers are made. 7. If you are unable to provide any type of test for your routine, please ensure that your documentation describes what your computation @@ -138,8 +138,8 @@ below. Submitting Python Computational Routines --------------------------------------------- -If you would like to submit a computational routine in Python, but don't know -how to integrate it with the rest of WRF-Python's internals +If you would like to submit a computational routine written in Python, but +don't know how to integrate it with the rest of WRF-Python's internals (e.g. left indexing, arg checking, etc), feel free to submit the pure Python routine. Below is the guide for submitting pure Python routines. @@ -170,30 +170,29 @@ Submitting Fully Wrapped Computational Routines Submitting a fully wrapped computational routines is the fastest way to get your contributation released. However, it requires the most effort on your -part. (This process will be simplified in the future, but it's a little -tedious at this time). +part. 1. Read the :ref:`internals` guide. This will show you how to wrap your routine. 2. Follow the :ref:`fortranstyle` and :ref:`pythonstyle`. -3. You should create your contribution in the WRF-Pyhon source tree as if +3. You should create your contribution in the WRF-Python source tree as if you were one of the core developers of it. This means: - - Your Fortran code (if applicable) should be placed in the fortran + - Your Fortran code (if applicable) should be placed in the *fortran* directory. - - Update the "ext1 = numpy.distutils.core.Extension" section of setup.py + - Update the "ext1 = numpy.distutils.core.Extension" section of *setup.py* to include your new Fortran source (if applicable). - - Update extension.py to create the Python wrapper that calls your + - Update *extension.py* to create the Python wrapper that calls your Fortran function. This must include the appropriate function decorators for handling argument checking, leftmost dimension indexing, etc. as described in :ref:`internals`. - If the current function decorators do not cover your specific needs, - place your custom decorator in specialdec.py. Most of the decorators + place your custom decorator in *specialdec.py*. Most of the decorators in specialdec.py are used for products that contain multiple outputs like cape_2d, but you can use it for other purposes. @@ -212,18 +211,18 @@ tedious at this time). - Decorate your getter routine with an appropriate metadata handling decorator. If you need to make a custom decorator for the metadata, - place it in metadecorators.py. + place it in *metadecorators.py*. - - Update the mappings in routines.py to map your diagnostic name to your + - Update the mappings in *routines.py* to map your diagnostic name to your function, and to declare any keyword arguments that your function needs aside from the usual wrfin, varname, timeidx, method, squeeze, cache, and meta. - If you would like to make your routine available as a raw computation, - you will need to place an additional thin wrapper in computation.py. This - thin wrapper must be decorated with an appropriate metadata decorator - found in metadecorators.py (usually set_alg_metadata). If you need to - write your own custom metadata decorator, write it in metadecorators.py. + you will need to place an additional thin wrapper in *computation.py*. + This thin wrapper must be decorated with an appropriate metadata decorator + found in *metadecorators.py* (usually set_alg_metadata). If you need to + write your own custom metadata decorator, write it in *metadecorators.py*. - You must provide a docstring for every function you create using Google docstring format (see `Sphinx Napoleon `_). @@ -251,8 +250,8 @@ Creating New Examples 1. Examples are made with Sphinx using restructured text. 2. Examples are currently found in the *doc* directory, mostly within the - basic_usage.rst and plot.rst files. Feel free to contribute more examples - to these files. + *basic_usage.rst* and *plot.rst* files. Feel free to contribute more + examples to these files. 3. Unless you are drastically changing the documentation structure, you can submit a pull request with your examples without creating a GitHub @@ -379,8 +378,7 @@ whitespace characters in blank lines, etc.), try the Fortran Style Guide ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -WRF-Python is a Fortran friendly project and we appreciate your contributions. -However, we are only accepting Fortran 90 contributions, so you must +At this time, we are only accepting Fortran 90 contributions, so you must convert any F77 code to F90 before contributing. Although there is no formal style guide for Fortran contributions, Fortran @@ -408,17 +406,17 @@ A summary of style notes is below: constants). All Fortran contributions must be threadsafe and have no side effects. - Place any computational constants in the wrf_constants module found in - wrf_constants.f90 and put a "USE wrf_constants, ONLY : YOUR_CONSTANT" + *wrf_constants.f90* and put a "USE wrf_constants, ONLY : YOUR_CONSTANT,..." declaration in your function. - Please do not redefine constants already declared in wrf_constants.f90 (e.g. G, RD, RV, etc). Although the WRF model itself does not adhere to this, we are trying to be consistent with the constants used throughout WRF-Python. -- Do not put any STOP statements in your code to deal with errors. STOP - statements will bring down the entire Python interpreter with it. Instead, - add *errstat* and *errmsg* arguments to your function signature to tell +- Do not put any STOP statements in your code to handle errors. STOP + statements will bring the entire Python interpreter down with it. Instead, + use *errstat* and *errmsg* arguments to your function signature to tell Python about the error so it can throw an exception. See WETBULBCALC - in wrf_rip_phys_routines.f90 for how this is handled. + in *wrf_rip_phys_routines.f90* for how this is handled. - If you know how to use OpenMP directives, feel free to add them to your routine, but this is not required. @@ -437,7 +435,7 @@ a basic suite of unit tests run. If your pull request is for a bug fix to an existing computational routine, then the automated unit tests will probably fail due to the new values. This is not a problem, but be sure to indicate to the developers in your GitHub -issue that the unit tests will need to be updated. +issue that the tests will need to be updated. .. testing_:: @@ -455,7 +453,7 @@ some recommendations below for how you can create your own tests. Sample Data ^^^^^^^^^^^^^^^^^^^ -You can download sample data for Hurricane Katrina here: +You can download sample data for Hurricane Katrina here: This data includes both a moving nest and a static nest version. You should create your tests with this data set (both static and moving nests), unless you are unable to reproduce a particular problem with it. @@ -464,12 +462,12 @@ Supplying Data ^^^^^^^^^^^^^^^^^^^^^^ If you need to supply us data for your test (due to a bug) please provide us a -link to either a cloud storage service, by :ref:`submitting_files`, or some -other means. Unless the data is very small, do not add it to the GitHub +link to the file or upload it using :ref:`submitting_files`. +Unless the data is very small, do not add it to the GitHub repository. If you can demonstrate the problem/solution with a minimal set of hand created -values, you can just use that in your test. +values, then you can use that in your test. Guidelines @@ -488,7 +486,7 @@ issue related to your contribution to discuss with developers. 3. WRF-Python's tests were written using the standard *unittest* package, along with numpy's test package for the assert fuctions. One reason for this is that many of the tests are dynamically generated, and - some other testing frameworks can't find the tests when generated this way. + other testing frameworks can't find the tests when generated this way. If you need to use another test framework, that's fine, just let us know in your GitHub issue. diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 481d3cf..da2baa7 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -6,19 +6,21 @@ WRF-Python Internals WRF-Python is a collection of diagnostic and interpolation routines for WRF-ARW data. The API is kept to a minimal set of functions, since we've found this to be the easiest to teach to new programmers, students, and scientists. -Future plans do include adopting the Pangeo xarray/dask model for more -advanced programmers, but is not currently supported as of this user guide. +Future plans include adopting the Pangeo xarray/dask model, along with an +object oriented API, but this is not currently supported as of this user +guide. A typical use case for a WRF-Python user is to: 1) Open a WRF data file (or sequence of files) using NetCDF4-python or PyNIO. 2) Compute a WRF diagnostic using :meth:`wrf.getvar`. -3) Perform other computations using methods outside of WRF-Python. +3) Perform any additional computations using methods outside of WRF-Python. 4) Create a plot of the output using matplotlib (basemap or cartopy) or PyNGL. -The purpose of this guide is to explain the internals of item 2 so that -users can help contribute or support the computational diagnostics. +The purpose of this guide is to explain the internals of item (2) so that +users can help contribute or support the computational diagnostic +routines. Overview of a :meth:`wrf.getvar` Diagnostic Computation @@ -27,21 +29,21 @@ Overview of a :meth:`wrf.getvar` Diagnostic Computation A diagnostic computed using the :meth:`wrf.getvar` function consists of the following steps: -1) Using the diagnostic string, call the appropriate 'getter' function. This - step occurs in the :met:`wrf.getvar` routine in routines.py. +1) Call the appropriate 'getter' function based on the specified diagnostic + label. This step occurs in the :meth:`wrf.getvar` routine in routines.py. 2) Extract the required variables from the NetCDF data file (or files). 3) Compute the diagnostic using a wrapped Fortran, C, or Python routine. -4) Convert to the desired units if applicable. +4) Convert to the desired units (if applicable). 5) Set the metadata (if desired) and return the result as an :class:`xarray.DataArray`, or return a :class:`numpy.ndarray` if no - metadata is desired. + metadata is required. In the source directory, the :meth:`wrf.getvar` 'getter' routines have a -"g_" prefix for the naming convention (the "g" stands for "get"). +"\g\_" prefix for the naming convention (the "g" is for "get"). The unit conversion is handled by a :mod:`wrapt` decorator that can be found -in decorators.py. The setting of the metadata is handled using a :mod:`wrapt` -decorator, which can be found in the metadecorators.py file. +in *decorators.py*. The setting of the metadata is handled using a :mod:`wrapt` +decorator, which can be found in the *metadecorators.py* file. Overview of Compiled Computational Routines @@ -49,65 +51,68 @@ Overview of Compiled Computational Routines Currently, the compiled computational routines are written in Fortran 90 and exposed the Python using f2py. The routines have been aquired over -decades, originated from NCL's Fortran77 codebase or other tools like RIP -(Read Interpolate Plot), and do not necessarily conform to a common -programming mindset (e.g. some use 1D arrays, 2D arrays, etc). +decades, originated from NCL's Fortran77 codebase, the WRF model itself, +or other tools like RIP (Read Interpolate Plot), and do not necessarily +conform to a common programming mindset (e.g. 1D arrays, 2D arrays, etc). The raw Fortran routines are compiled in to the :mod:`wrf._wrffortran` -extension module, but are not particularly useful for applications in that -raw form. These routines are imported in the extension.py module, where +extension module, but are not particularly useful for applications in their +raw form. These routines are imported in the *extension.py* module, where additional functionality is added to make the routines more user friendly. -The typical behavior for a fully exported Fortran routine in extension.py +The typical behavior for a fully exported Fortran routine in *extension.py* is: -1) Verify that the arguments passed in are valid in shape. Although f2py does +1) Verify that the supplied arguments are valid in shape. Although f2py does this as well, the errors thrown by f2py are confusing to users, so this step helps provide better error messages. -2) Allocate the ouput array based on the output shape of the algorithm, +2) Allocate an ouput array based on the output shape of the algorithm, number of "leftmost"[1]_ dimensions, and size of the data. 3) Iterate over the leftmost [1]_ dimensions and compute output for argument data slices that are of the same dimensionality as the compiled algorithm. -4) Cast the argument arrays in to the dtype used in the - compiled routine (for WRF data the conversion is usually from a 4-byte - float to an 8-byte double). +4) Cast the argument arrays (or array slices) in to the dtype used in the + compiled routine. For WRF data, the conversion is usually from a 4-byte + float to an 8-byte double. 5) Extract the argument arrays out of xarray in to numpy arrays (if applicable) and transpose them in to Fortran ordering. Note that this does not actually do any copying of the data, it simply reorders the shape tuple for the data and sets the Fortran ordering flag. This allows data - pointers from the output array slices to be directly passed through f2py - so that copying is not required from the result in to the output array. + pointers from the output array slices to be passed directly to the + Fortran routine, which eliminates the need to copy the result to the output + array. The steps described above are handled in :mod:`wrapt` decorators that can be -found in decorators.py. For some routines that produce multiple outputs or have -atypical behavior, the special case decorators are located in specialdec.py. +found in *decorators.py*. For some routines that produce multiple outputs or +have atypical behavior, the special case decorators are located in +*specialdec.py*. .. [1] If the Fortran algorithm is written for a 2-dimensional array, and a users passes in a 5-dimensional array, there are 3 "leftmost" dimensions. -An Example +Example ---------------------------- The above overviews are better explained by an example. Although there are a -few exceptions (e.g. ll_to_xy), most of the routines in WRF-Python behave the -same way. +few exceptions (e.g. ll_to_xy), many of the routines in WRF-Python behave this +way. For this example, let's make a routine that adds a variable's base state to its perturbation. This is the kind of thing that you'd normally use numpy -for (e.g. Ptot = P + PB), but you could do this if you wanted concurrency -for this operation via OpenMP rather than using dask in a future release of +for (e.g. Ptot = PB + P), but you could do this if you wanted concurrency +for this operation via OpenMP rather than using dask (in a future release of WRF-Python, both OpenMP and dask will be available). + Fortran Code ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Here's the Fortran 90 code, which will be written to a file called +Below is the Fortran 90 code, which will be written to a file called example.f90. .. code:: fortran @@ -125,8 +130,8 @@ example.f90. !$OMP PARALLEL DO COLLAPSE(2) SCHEDULE(runtime) DO j=1, ny - DO i=1,nx - total(i) = base(i) + pert(i) + DO i=1, nx + total(i, j) = base(i, j) + pert(i, j) END DO END DO !$OMP END PARALLEL DO @@ -134,10 +139,10 @@ example.f90. END SUBROUTINE pert_add -This code simply adds the base and perturbation and stores the result for each -grid point. For this example, we're using a 2D array because most examples you -see will look like this, but it could have been written with a 1D array as -was done with DCOMPUTETK in wrf_user.f90. +This code adds the 2D base and perturbation variables and stores the result in +a 2D output array. (For this example, we're using a 2D array to help +illustrate leftmost indexing below, but it could have been written using +a 1D or 3D array). At the top, there are these two f2py directives: @@ -153,26 +158,26 @@ computation. This way, Python threads will contine to run, which may be important if you are using this in a webserver or in some other threaded environment like dask's threaded scheduler. -The *intent(in,out)* f2py directive is used because in most cases, you will -be supplying a slice of your output array to this routine and you don't want -to have to copy the result from Fortran back in to your result array. By +The *intent(in,out)* f2py directive is used because we will +be supplying a slice of the output array directly to this routine and don't +want to have to copy the result from Fortran back in to the result array. By specifying intent(in,out), we're telling f2py to use the pointer to our output array directly. Finally, for the OpenMP directive, the scheduler is set to use runtime scheduling via *SCHEDULE(runtime)*. By using runtime scheduling, users -can set the scheduling type within Python, but for most users the default will -be sufficient. +can set the scheduling type within Python, but for most users the default is +sufficient. Building the Fortran Code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To build the Fortran code, the example.f90 source code should be placed in the -*fortran* directory of the source tree. +To build the Fortran code, the *example.f90* source code should be placed in +the *fortran* directory of the source tree. -Next, update the numpy.distutils.core.Extension section of setup.py in the -root directory of the source tree. +Next, we need to update the numpy.distutils.core.Extension section of +*setup.py* in the root directory of the source tree. .. code:: python @@ -200,34 +205,35 @@ root directory of the source tree. ) The easiest way to build your code is to use one of the build scripts located -in the *build_scripts*. These scripts contain variants for compiling with -or without OpenMP support. Unless you're debugging a problem, building with -OpenMP is recommended. +in the *build_scripts* directory of the source tree. These scripts contain +variants for compiling with or without OpenMP support. Unless you are +debugging a problem, building with OpenMP is recommended. For this example, we're going to assume you already followed how to -:ref:`dev_setup`. Here are the instructions: +:ref:`dev_setup`. Below are the build instructions for compiling with +OpenMP enabled on GCC (Linux or Mac): .. code:: - pip uninstall wrf-python (if you already installed it) + pip uninstall wrf-python cd build_scripts sh ./gnu_omp.sh The above command will build and install the new routine, along with the other Fortran routines. If you recieve errors, then your code failed to build sucessfully. Otherwise, your new routine can be called as -wrf._wrffortran.pert_add. +:meth:`wrf._wrffortran.pert_add`. Creating a Thin Python Wrapper ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The new Fortran pert_add routine will work fine as long as you are only -calling it for one 2D slice of data. If you want to extend the functionality +The new Fortran pert_add routine will work well for a 2D slice of data. +However, if you want to extend the functionality to work with any dimensional array, you'll need to add a thin wrapper -with some extra functionality added via :mod:`wrapt` decorators. +with some extra functionality that make use of :mod:`wrapt` decorators. -First, let's start by creating a very thin wrapper in Python in extension.py. +First, let's start by creating a very thin wrapper in Python in *extension.py*. .. code:: python @@ -254,28 +260,28 @@ First, let's start by creating a very thin wrapper in Python in extension.py. Despite being only a few lines of code, there is quite a bit going on in the wrapper. The first thing to note is the arguments to the wrapper function. The -only arguments you will need for the wrapper are the inputs to the function -and an "outview" argument. At this point in the call chain, the arguments are -assumed to be Fortran-ordered, in that the Fortran ordering flag is set and -the shape is transposed from a usual C-ordered numpy array (the data itself -remains in the same order that it was created). By passing numpy -arrays with the Fortran order flag set, f2py will pass the pointer directly -through to the Fortran routine. - -The outview argument is used during leftmost dimension indexing to send slices -of the output array to the Fortran routine to be filled. If there are no -leftmost dimensions (e.g. this routine is called on 2D data), then the outview -argument will be None and an outview variable will be created with the same -number of dimensions as the *base* argument. It should be created with Fortran -ordering so the pointer is directly passed to the Fortran routine. - -When the actual *pert_add* Fortran routine is called, the nx and ny arguments -are ommitted because f2py will supply this for you based on the shape of the -numpy arrays you are supplying as input arguments. F2py also likes to return -an array as a result, so even though you supplied outview as an array to -be filled by the Fortran routine, you will still get a result from the -function call that is pointing to the same thing as outview. (We could have -chosen to ignore the result and returned outview instead). +only arguments that we need for the wrapper are the inputs to the function +and an "outview" keyword argument. At this point in the call chain, the +arguments are assumed to be Fortran-ordered, in that the Fortran ordering flag +is set and the shape is transposed from a usual C-ordered numpy array +(the data itself remains in the same order that it was created). By passing +numpy arrays with the Fortran order flag set, f2py will pass the pointer +directly through to the Fortran routine. + +The *outview* keyword argument is used during leftmost dimension indexing to +send slices of the output array to the Fortran routine to be filled. If there +are no leftmost dimensions (e.g. this routine is called with 2D data), then the +outview argument will be None and an outview variable will be created with the +same number of dimensions as the *base* argument. It should be created with +Fortran ordering so that the pointer is directly passed to the Fortran routine. + +When the actual :meth:`wrf._wrffortran.pert_add` Fortran routine is called, +the nx and ny arguments are ommitted because f2py will supply this for us +based on the shape of the numpy arrays we are supplying as input arguments. +F2py also likes to return an array as a result, so even though we supplied +outview as an array to be filled by the Fortran routine, we will still get a +result from the function call that is pointing to the same thing as outview. +(We could have chosen to ignore the result and return outview instead). Extract and Transpose @@ -284,13 +290,13 @@ Extract and Transpose The arrays that are being passed to the _pert_add thin wrapper need to be numpy arrays in Fortran ordering, but they won't come this way from users. They will come in as either :class:`numpy.ndarray` -or :class:`xarray.DataArray` and will be C-ordered. So, we need to to make -sure that Fortran-ordered :class:`numpy.ndarray` is what is going to +or :class:`xarray.DataArray` and will be C-ordered. So, we need to make +sure that a Fortran-ordered :class:`numpy.ndarray` is what is passed to the thin wrapper. -Since this type of operation is repeated many times, a decorator has been -written in *decorators.py* for this purpose. So let's decorate our thin -wrapper with this function. +Since this type of operation is repeated for many diagnostic functions, a +decorator has been written in *decorators.py* for this purpose. Let's decorate +our thin wrapper with this function. .. code:: python @@ -323,17 +329,17 @@ the result of the computation and returns the :attr:`numpy.ndarray.T` from the Fortran-ordered result. This result gets passed back up the decorator chain. -Cast Type +Cast to Fortran Array Types ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The Fortran routine expects a specific type of data to operate on, -usually double precision numbers. WRF files typically store -their data as 4-byte loating point precision numbers to save -space. So, the arrays being passed to the extract_and_transpose decorator -need to be converted to the type used in the Fortran routine (e.g. double), -then converted back to the original type (e.g. float) after the computation -is finished. This is handled by the :meth:`cast_type` decorator function -in *decorators.py*. +The Fortran routine expects a specific data type for the arrays (usually +REAL(KIND=8)). WRF files typically store their data as 4-byte floating point +numbers to save space. The arrays being passed to the +:meth:`wrf.decorators.extract_and_transpose` decorator need to be converted +to the type used in the Fortran routine (e.g. double), then converted back to +the original type (e.g. float) after the computation is finished. This is +handled by the :meth:`wrf.decorators.cast_type` decorator function in +*decorators.py*. .. code:: python @@ -354,9 +360,9 @@ in *decorators.py*. return result -The :meth:`cast_type` decorator function takes an *arg_idxs* argument to -specify which positional arguments need to be cast to the Fortran algorithm -type, in this case arguments 0 and 1 (base and pert). +The :meth:`wrf.decorators.cast_type` decorator function takes an +*arg_idxs* argument to specify which positional arguments need to be cast to +the Fortran algorithm type, in this case arguments 0 and 1 (base and pert). Following the computation, the result will be cast back to the original type for the input arguments (usually float), and passed back up the decorator @@ -368,10 +374,10 @@ Leftmost Dimension Indexing The WRF-Python algorithms written in Fortran are usually written for fixed size arrays of 1, 2, or 3 dimensions. If your input arrays have more than -the number of dimensions written for the Fortran algorithm, then we need to +the number of dimensions specified for the Fortran algorithm, then we need to do the following: -1. Determine how many leftmost dimensions there are. +1. Determine how many leftmost dimensions are used. 2. Create an output array that has a shape that contains the leftmost dimensions concatenated with the shape of the result from the Fortran @@ -385,14 +391,15 @@ do the following: 5. Return the fully calculated output array. -The :meth:`left_iteration` is general purpose decorator contained in -*decorators.py* to handle most leftmost index iteration cases. Some products, -like cape_2d, return multiple products in the output and don't fall in to -this generic category, so those decorators can be found in *specialdec.py*. +The :meth:`wrf.decorators.left_iteration` is general purpose decorator +contained in *decorators.py* to handle most leftmost index iteration cases. +(Note: Some products, like cape_2d, return multiple products in the output +and don't fall in to this generic category, so those decorators can be found +in *specialdec.py*) Let's look at how this is used below. -.. code:: python: +.. code:: python @left_iteration(2, 2, ref_var_idx=0) @cast_type(arg_idxs=(0, 1)) @@ -413,9 +420,9 @@ Let's look at how this is used below. return result -The :meth:`wrf.left_iteration` decorator handles many different use cases -with its arguments, but this example is one of the more common cases. The -0th positional argument tells the decorator that the "reference" input +The :meth:`wrf.decorators.left_iteration` decorator handles many different +use cases with its arguments, but this example is one of the more common cases. +The 0th positional argument tells the decorator that the "reference" input variable should provide at least two dimensions. This should be set to the same number of dimensions as in the Fortran algorithm, which is two in this case. Dimensions to the left of these two dimensions are considered "leftmost" @@ -433,25 +440,25 @@ The final keyword argument of *ref_ver_idx* tells the decorator to use positional argument 0 (for the _pert_add function) as the reference variable. -The result of this decorator will be the fully computed output array and it -is passed back up the chain. +The result of this decorator will be the fully computed output array, which +gets passed back up the chain. Checking Argument Shapes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Before any computations can be performed, the argument shapes are checked to -make sure they are correct sizes. Although f2py will catch problems at the +verify their sizes. Although f2py will catch problems at the entry point to the Fortran routine, the error thrown is confusing to users. -The :meth:`wrf.check_args` decorator is used to verify that the arguments are -the correct size before proceeding. +The :meth:`wrf.decorators.check_args` decorator is used to verify that the +arguments are the correct size before proceeding. -Here is how it is used below +Here is how it is used: -.. code:: python: +.. code:: python @check_args(0, 2, (2, 2)) @left_iteration(2, 2, ref_var_idx=0) @@ -472,13 +479,14 @@ Here is how it is used below return result -The 0th positional argument (value of 0), tells :meth:`wrf.check_args` that -the 0th positional argument of _pert_add is the reference variable. +The 0th positional argument (value of 0), tells +:meth:`wrf.decorators.check_args` that the 0th positional argument of +_pert_add is the reference variable. -The next postional argument (value of 2) tells :meth:`check_args` that it -should expect at least 2 dimensions for the reference variable. This should -be set to the number of dimensions used in the Fortran algorithm, which is two -in this case. +The next postional argument (value of 2) tells +:meth:`wrf.decorators.check_args` that it should expect at least 2 dimensions +for the reference variable. This should be set to the number of dimensions +used in the Fortran algorithm, which is two in this case. The final positional argument is a tuple with the number of dimensions that are expected for each array argument. Again, this should be set to the same @@ -498,12 +506,13 @@ first, then left_iteration, then cast_type, then extract_and_transpose, and finally _pert_add. After _pert_add is finished, the result is passed back up the chain and back to the user. -So now that we have a fully wrapped compiled routine, how might we use this? +Now that we have a fully wrapped compiled routine, how might we use this? -Let's make a new :meth:`wrf.getvar` product called 'total_pressure'. A -similar product already exists in WRF-Python, but this is just for +Let's make a new :meth:`wrf.getvar` diagnostic called 'total_pressure'. A +similar diagnostic already exists in WRF-Python, but this is just for illustration of how to use our newly wrapped Fortran routine. + Make a 'getter' Function ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -511,13 +520,13 @@ First, we need a 'getter' routine that extracts the required input variables from the WRF NetCDF file(s) to perform the computation. In this case, the variables are P and PB. -The currently naming convention in WRF-Python is to prefix the 'getter' -functions with a 'g_', so let's call this file g_totalpres.py and make a +The current naming convention in WRF-Python is to prefix the 'getter' +functions with a '\g\_', so let's call this file g_totalpres.py and make a function get_total_pressure inside of it. The contents of this file will be: -.. code:: python: +.. code:: python # g_totalpres.py @@ -598,16 +607,16 @@ The contents of this file will be: This getter function extracts the PB and P (base and pertrubation pressure) variables and calls the _pert_add function and returns the result. The arguments *wrfin*, *timeidx*, *method*, *squeeze*, *cache*, *meta*, and -*_key* are used for every getter function and you can read what they are -used for in the docstring. - -You should also notice that the getter function is decorated with a -:meth:`copy_and_set_metadata` decorator. This is a general purpose decorator -used for copying metadata from an input variable and applying it to the result. -In this case, the variable to copy is P. The *name* parameter specifies the -:attr:`xarray.DataArray.name` attribute for the variable (the name that -will be written to a NetCDF variable). The *description* is a brief -description for variable that will be placed in the +*_key* are used for every getter function and you can read what they do in +the docstring. + +The getter function is also decorated with a +:meth:`wrf.decorators.copy_and_set_metadata` decorator. This is a general +purpose decorator that is used for copying metadata from an input variable +to the result. In this case, the variable to copy is 'P'. The *name* parameter +specifies that the :attr:`xarray.DataArray.name` attribute for the variable +(the name that will be written to a NetCDF variable). The *description* is a +brief description for variable that will be placed in the :attr:`xarray.DataArray.attrs` dictionary along with the *units* parameter. @@ -616,11 +625,11 @@ Make Your New Diagnostic Available in :meth:`wrf.getvar` The final step is to make the new 'total_pressure' diagnostic available from :meth:`wrf.getvar`. To do this, modifications need to be made to -routines.py. +*routines.py*. First, import your new getter routine at the top of routines.py. -.. code:: python: +.. code:: python from __future__ import (absolute_import, division, print_function) @@ -640,7 +649,7 @@ First, import your new getter routine at the top of routines.py. Next, update _FUNC_MAP to map your diagnostic label ('total_pressure') to the getter routine (get_total_pres). -.. code:: python: +.. code:: python _FUNC_MAP = {"cape2d": get_2dcape, "cape3d": get_3dcape, @@ -660,7 +669,7 @@ are not declared in this map. In this case, there aren't any addtional keyword arguments, so we'll just supply an empty list. -.. code:: python: +.. code:: python _VALID_KARGS = {"cape2d": ["missing"], "cape3d": ["missing"], diff --git a/test/ipynb/loop_and_fill.ipynb b/test/ipynb/loop_and_fill.ipynb new file mode 100644 index 0000000..1ee2b38 --- /dev/null +++ b/test/ipynb/loop_and_fill.ipynb @@ -0,0 +1,141 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "array([[[[303.6255 , ..., 303.81546],\n", + " ...,\n", + " [304.7401 , ..., 300.15717]]],\n", + "\n", + "\n", + " ...,\n", + "\n", + "\n", + " [[[301.89346, ..., 302.56534],\n", + " ...,\n", + " [302.61246, ..., 298.01028]]]], dtype=float32)\n", + "Coordinates:\n", + " * Time (Time) datetime64[ns] 2005-08-28 2005-08-28T03:00:00 ...\n", + " XTIME (Time) float64 0.0 180.0 360.0 540.0\n", + " XLAT (south_north, west_east) float32 21.302004 21.302004 21.302004 ...\n", + " XLONG (south_north, west_east) float32 -90.57406 -90.484116 ...\n", + "Dimensions without coordinates: bottom_top, south_north, west_east\n", + "Attributes:\n", + " FieldType: 104\n", + " MemoryOrder: XY \n", + " description: TEMP at 2 M\n", + " units: K\n", + " stagger: \n", + " coordinates: XLONG XLAT XTIME\n", + " projection: Mercator(stand_lon=-89.0, moad_cen_lat=27.99999237060547, t...\n" + ] + } + ], + "source": [ + "from __future__ import print_function, division\n", + "\n", + "import os\n", + "import numpy as np\n", + "from netCDF4 import Dataset\n", + "from wrf import getvar, ALL_TIMES, to_np\n", + "import xarray\n", + "\n", + "path = '/scratch/mawagner/2015_GRELL3D/TESTFILES/TestTime'\n", + "filename_list = os.listdir(path)\n", + "filename_list.sort()\n", + "\n", + "#filename_list = [\"/Users/ladwig/Documents/wrf_files/wrf_vortex_single/wrfout_d02_2005-08-28_00:00:00\",\n", + "# \"/Users/ladwig/Documents/wrf_files/wrf_vortex_single/wrfout_d02_2005-08-28_03:00:00\",\n", + "# \"/Users/ladwig/Documents/wrf_files/wrf_vortex_single/wrfout_d02_2005-08-28_06:00:00\",\n", + "# \"/Users/ladwig/Documents/wrf_files/wrf_vortex_single/wrfout_d02_2005-08-28_09:00:00\"]\n", + "\n", + "# Result shape \n", + "result_shape = (6, 1, 290, 265)\n", + "#result_shape = (4, 1, 96, 96)\n", + "\n", + "# Let's get the first time so we can copy the metadata later\n", + "f = Dataset(filename_list[0])\n", + "# By setting squeeze to False, you'll get all the dimension names.\n", + "z1 = getvar(f, \"T2\", 0, squeeze=False)\n", + "xlat = getvar(f, \"XLAT\", 0)\n", + "xlong = getvar(f, \"XLONG\", 0)\n", + "\n", + "\n", + "z_final = np.empty(result_shape, np.float32)\n", + "\n", + "# Modify this number if using more than 1 time per file\n", + "times_per_file = 1\n", + "#times_per_file = 4\n", + "\n", + "data_times = []\n", + "xtimes = []\n", + "for timeidx in range(result_shape[0]):\n", + " # Compute the file index and the time index inside the file\n", + " fileidx = timeidx // times_per_file\n", + " file_timeidx = timeidx % times_per_file\n", + "\n", + " f = Dataset(filename_list[fileidx])\n", + " z = getvar(f, \"T2\", file_timeidx)\n", + " t = getvar(f, \"Times\", file_timeidx)\n", + " xt = getvar(f, \"xtimes\", file_timeidx)\n", + " data_times.append(to_np(t))\n", + " xtimes.append(to_np(xt))\n", + " z_final[timeidx,:] = z[:]\n", + " f.close()\n", + " \n", + "# Let's make the metadata. Dimension names should copy easily if you set sqeeze to False, \n", + "# otherwise you can just set them yourself is a tuple of dimension names. Since you wanted\n", + "# to keep the bottom_top dimension for this 2D variable (which is normally removed), \n", + "# I'm doing this manually.\n", + "z_dims = [\"Time\", \"bottom_top\", \"south_north\", \"west_east\"]\n", + "\n", + "# Xarray doesn't copy coordinates easily (it always complains about shape mismatches), so do this\n", + "# manually\n", + "z_coords = {}\n", + "z_coords[\"Time\"] = data_times\n", + "z_coords[\"XTIME\"] = (\"Time\",), xtimes\n", + "z_coords[\"XLAT\"] = (\"south_north\", \"west_east\"), xlat\n", + "z_coords[\"XLONG\"] = (\"south_north\", \"west_east\"), xlong\n", + "z_name = \"T2\"\n", + "\n", + "# Attributes copy nicely\n", + "z_attrs = {}\n", + "z_attrs.update(z1.attrs)\n", + "\n", + "z_with_meta = xarray.DataArray(z_final, coords=z_coords, dims=z_dims, attrs=z_attrs, name=z_name)\n", + "\n", + "print(z_with_meta)\n", + " \n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test/ipynb/reduce_files.ipynb b/test/ipynb/reduce_files.ipynb new file mode 100644 index 0000000..6db8eb1 --- /dev/null +++ b/test/ipynb/reduce_files.ipynb @@ -0,0 +1,259 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tempfile\n", + "import glob\n", + "import shutil\n", + "import os\n", + "import numpy as np\n", + "from netCDF4 import Dataset\n", + "from wrf import getvar, ll_to_xy, CoordPair, GeoBounds, to_np\n", + "\n", + "_VARS_TO_KEEP = (\"Times\", \"XLAT\", \"XLONG\", \"XLAT_U\", \"XLAT_V\", \"XLONG_U\", \n", + " \"XLONG_V\", \"U\", \"V\", \"W\", \"PH\", \"PHB\", \"T\", \"P\", \"PB\", \"Q2\", \n", + " \"T2\", \"PSFC\", \"U10\", \"V10\", \"XTIME\", \"QVAPOR\", \"QCLOUD\", \n", + " \"QGRAUP\", \"QRAIN\", \"QSNOW\", \"QICE\", \"MAPFAC_M\", \"MAPFAC_U\",\n", + " \"MAPFAC_V\", \"F\", \"HGT\", \"RAINC\", \"RAINSH\", \"RAINNC\", \"I_RAINC\", \"I_RAINNC\",\n", + " \"PBLH\")\n", + "\n", + "class FileReduce(object):\n", + " def __init__(self, filenames, geobounds, tempdir=None, vars_to_keep=None, \n", + " max_pres=None, compress=False, delete=True, reuse=False):\n", + " \"\"\"An iterable object for cutting out geographic domains.\n", + " \n", + " Args:\n", + " \n", + " filenames (sequence): A sequence of file paths to the WRF files\n", + " \n", + " geobounds (GeoBounds): A GeoBounds object defining the region of interest\n", + " \n", + " tempdir (str): The location to store the temporary cropped data files. If None, tempfile.mkdtemp is used.\n", + " \n", + " vars_to_keep (sequence): A sequence of variables names to keep from the original file. None for all vars.\n", + " \n", + " max_press (float): The maximum pressure height level to keep. None for all levels.\n", + " \n", + " compress(bool): Set to True to enable zlib compression of variables in the output.\n", + " \n", + " delete (bool): Set to True to delete the temporary directory when FileReduce is garbage collected.\n", + " \n", + " reuse (bool): Set to True when you want to resuse the files that were previously converted. *tempdir* \n", + " must be set to a specific directory that contains the converted files and *delete* must be False.\n", + " \n", + " \n", + " \"\"\"\n", + " self._filenames = filenames\n", + " self._i = 0\n", + " self._geobounds = geobounds\n", + " self._delete = delete\n", + " self._vars_to_keep = vars_to_keep\n", + " self._max_pres = max_pres\n", + " self._compress = compress\n", + " self._cache = set()\n", + " self._own_data = True\n", + " self._reuse = reuse\n", + " \n", + " if tempdir is not None:\n", + " if not os.path.exists(tempdir):\n", + " os.makedirs(tempdir)\n", + " self._tempdir = tempdir\n", + " if self._reuse:\n", + " self._cache = set((os.path.join(self._tempdir, name) \n", + " for name in os.listdir(self._tempdir)))\n", + " else:\n", + " self._tempdir = tempfile.mkdtemp()\n", + "\n", + " print (\"temporary directory is: {}\".format(self._tempdir))\n", + " self._prev = None\n", + " self._set_extents()\n", + " \n", + " def _set_extents(self):\n", + " fname = list(self._filenames)[0]\n", + " with Dataset(fname) as ncfile:\n", + " lons = [self._geobounds.bottom_left.lon, self._geobounds.top_right.lon]\n", + " lats = [self._geobounds.bottom_left.lat, self._geobounds.top_right.lat]\n", + " orig_west_east = len(ncfile.dimensions[\"west_east\"])\n", + " orig_south_north = len(ncfile.dimensions[\"south_north\"])\n", + " orig_bottom_top = len(ncfile.dimensions[\"bottom_top\"])\n", + " \n", + " # Note: Not handling the moving nest here\n", + " # Extra points included around the boundaries to ensure domain is fully included\n", + " x_y = ll_to_xy(ncfile, lats, lons, meta=False)\n", + " self._start_x = 0 if x_y[0,0] == 0 else x_y[0,0] - 1\n", + " self._end_x = orig_west_east - 1 if x_y[0,1] >= orig_west_east - 1 else x_y[0,1] + 1\n", + " self._start_y = 0 if x_y[1,0] == 0 else x_y[1,0] - 1\n", + " self._end_y = orig_south_north - 1 if x_y[1,1] >= orig_south_north - 1 else x_y[1,1] + 1\n", + " \n", + " self._west_east = self._end_x - self._start_x + 1\n", + " self._west_east_stag = self._west_east + 1\n", + " self._south_north = self._end_y - self._start_y + 1\n", + " self._south_north_stag = self._south_north + 1\n", + " \n", + " # Crop the vertical to the specified pressure\n", + " if self._max_pres is not None:\n", + " pres = getvar(ncfile, \"pressure\")\n", + " # Find the lowest terrain height\n", + " ter = to_np(getvar(ncfile, \"ter\"))\n", + " min_ter = float(np.amin(ter)) + 1\n", + " ter_less = ter <= min_ter\n", + " ter_less = np.broadcast_to(ter_less, pres.shape)\n", + " # For the lowest terrain height, find the lowest vertical index to meet \n", + " # the desired pressure level. The lowest terrain height will result in the \n", + " # largest vertical spread to find the pressure level.\n", + " x = np.transpose(((pres.values <= self._max_pres) & ter_less).nonzero())\n", + " self._end_bot_top = np.amin(x, axis=0)[0] \n", + " if (self._end_bot_top >= orig_bottom_top):\n", + " self._end_bot_top = orig_bottom_top - 1\n", + " else:\n", + " self._end_bot_top = orig_bottom_top - 1\n", + " \n", + " self._bottom_top = self._end_bot_top + 1\n", + " self._bottom_top_stag = self._bottom_top + 1\n", + " \n", + " print(\"bottom_top\", self._bottom_top)\n", + " \n", + " \n", + " def __iter__(self):\n", + " return self\n", + " \n", + " def __copy__(self):\n", + " cp = type(self).__new__(self.__class__)\n", + " cp.__dict__.update(self.__dict__)\n", + " cp._own_data = False\n", + " cp._delete = False\n", + " \n", + " return cp\n", + " \n", + " def __del__(self):\n", + " if self._delete:\n", + " shutil.rmtree(self._tempdir)\n", + " \n", + " def reduce(self, fname):\n", + " outfilename = os.path.join(self._tempdir, os.path.basename(fname))\n", + " \n", + " # WRF-Python can iterate over sequences several times during a 'getvar', so a cache is used to \n", + " if outfilename in self._cache:\n", + " return Dataset(outfilename)\n", + " \n", + " # New dimension sizes\n", + " dim_d = {\"west_east\" : self._west_east,\n", + " \"west_east_stag\" : self._west_east_stag,\n", + " \"south_north\" : self._south_north,\n", + " \"south_north_stag\" : self._south_north_stag,\n", + " \"bottom_top\" : self._bottom_top,\n", + " \"bottom_top_stag\" : self._bottom_top_stag\n", + " }\n", + " \n", + " # Data slice sizes for the 2D dimensions\n", + " slice_d = {\"west_east\" : slice(self._start_x, self._end_x + 1),\n", + " \"west_east_stag\" : slice(self._start_x, self._end_x + 2),\n", + " \"south_north\" : slice(self._start_y, self._end_y + 1),\n", + " \"south_north_stag\" : slice(self._start_y, self._end_y + 2),\n", + " \"bottom_top\" : slice(None, self._end_bot_top + 1),\n", + " \"bottom_top_stag\" : slice(None, self._end_bot_top + 2)\n", + " }\n", + " \n", + " with Dataset(fname) as infile, Dataset(outfilename, mode=\"w\") as outfile:\n", + " \n", + " # Copy the global attributes\n", + " outfile.setncatts(infile.__dict__)\n", + "\n", + " # Copy Dimensions, limiting south_north and west_east to desired domain\n", + " for name, dimension in infile.dimensions.items():\n", + " dimsize = dim_d.get(name, len(dimension))\n", + " outfile.createDimension(name, dimsize)\n", + "\n", + " # Copy Variables \n", + " for name, variable in infile.variables.items():\n", + " if self._vars_to_keep is not None:\n", + " if name not in self._vars_to_keep:\n", + " continue\n", + " \n", + " print (name)\n", + " new_slices = tuple((slice_d.get(dimname, slice(None)) for dimname in variable.dimensions))\n", + "\n", + " outvar = outfile.createVariable(name, variable.datatype, variable.dimensions, zlib=self._compress)\n", + "\n", + " outvar[:] = variable[new_slices]\n", + "\n", + " outvar.setncatts(variable.__dict__)\n", + " \n", + " \n", + " result = Dataset(outfilename)\n", + " \n", + " self._cache.add(outfilename)\n", + " \n", + " return result\n", + " \n", + " \n", + " def next(self):\n", + " if self._i >= len(self._filenames):\n", + " if self._prev is not None:\n", + " self._prev.close()\n", + " raise StopIteration\n", + " else:\n", + " fname = self._filenames[self._i]\n", + " reduced_file = self.reduce(fname)\n", + " if self._prev is not None:\n", + " self._prev.close()\n", + " self._prev = reduced_file\n", + " \n", + " self._i += 1\n", + " \n", + " return reduced_file\n", + " \n", + " # Python 3\n", + " def __next__(self):\n", + " return self.next()\n", + "\n", + "# How to use with getvar\n", + "# Set lower left and upper right to your desired domain\n", + "# Idaho bounding box: [\"41.9880561828613\",\"49.000846862793\",\"-117.243034362793\",\"-111.043563842773\"]\n", + "ll = CoordPair(lat=41.8, lon=-117.26)\n", + "ur = CoordPair(lat=49.1, lon=-110.5)\n", + "bounds = GeoBounds(ll, ur)\n", + "reduced_files = FileReduce(glob.glob(\"/Users/ladwig/Documents/wrf_files/boise_tutorial/orig/wrfout_*\"),\n", + " bounds, vars_to_keep=_VARS_TO_KEEP, max_pres=400,\n", + " tempdir=\"/Users/ladwig/Documents/wrf_files/boise_tutorial/reduced\", \n", + " delete=False, reuse=True)\n", + "\n", + "pres = getvar(reduced_files, \"pressure\")\n", + "\n", + "print(pres)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}