Create readme file with scripts description

1 year ago · 1613757a75
3 changed files with 166 additions and 37 deletions
--- a/1_Earlier_measurements_images.ipynb
+++ b/1_Earlier_measurements_images.ipynb
@ -7,10 +7,21 @@
				@@ -7,10 +7,21 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# importing the necessary libraries for data visualization and numerical operations\n",
+    "# matplotlib.pyplot is used for plotting graphs, and numpy is used for handling numerical data efficiently\n",
+    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "a9d89f2a-2da0-4279-9355-05926c39084d",
+   "metadata": {},
+   "source": [
+    "### Helper functions and variables"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@ -18,6 +29,7 @@
				@@ -18,6 +29,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# Define an array of abbreviated month names to use as labels on the x-axis of plots\n",
    "month_name = ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D']"
   ]
  },
@ -26,7 +38,7 @@
				@@ -26,7 +38,7 @@
   "id": "4fac1ed4-536f-4e57-9ea5-820eeb0449e2",
   "metadata": {},
   "source": [
-    "### The results of earlier measurements"
+    "### Digitized measurement results from external sources"
   ]
  },
  {
@ -36,14 +48,14 @@
				@@ -36,14 +48,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# air—Earth current values measured at Kew in Jun 1930 — May 1931\n",
+    "# units are A·m^(−2)\n",
+    "# source: Scrase (1933), Table 2\n",
+    "\n",
    "kew_data = (\n",
    "    np.array([40, 66, 86, 76, 90, 114, 113, 138, 94, 93, 65, 55])\n",
    "    * 10**4 * 10**(-18)\n",
-    ")\n",
-    "\n",
-    "# air—Earth current values measured at Kew in Jun 1930 — May 1931\n",
-    "# units are A·m^(−2)\n",
-    "# source: Scrase (1933), Table 2"
+    ")"
   ]
  },
  {
@ -53,15 +65,15 @@
				@@ -53,15 +65,15 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# air—Earth current values measured at Mauna Loa in 1977–1983\n",
+    "# units are A·m^(−2)\n",
+    "# source: Adlerman & Williams (1996), Fig. 6 (data by Cobb)\n",
+    "\n",
    "mauna_loa_data = (\n",
    "    np.array([30.8, 31.7, 31.3, 28.0, 33.7, 36.4,\n",
    "              35.7, 34.1, 33.2, 33.9, 34.0, 32.2])\n",
    "    * 10**(-13)\n",
-    ")\n",
-    "\n",
-    "# air—Earth current values measured at Mauna Loa in 1977–1983\n",
-    "# units are A·m^(−2)\n",
-    "# source: Adlerman & Williams (1996), Fig. 6 (data by Cobb)"
+    ")"
   ]
  },
  {
@ -71,15 +83,15 @@
				@@ -71,15 +83,15 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# air—Earth current values measured at Athens in 1965–1980\n",
+    "# units are A·m^(−2)\n",
+    "# source: Retalis (1991), Fig. 6\n",
+    "\n",
    "athens_data = (\n",
    "    np.array([23.3, 19.7, 18.4, 21.6, 19.2, 24.1,\n",
    "              24.1, 26.0, 23.2, 20.5, 21.0, 19.0])\n",
    "    * 10**(-13)\n",
-    ")\n",
-    "\n",
-    "# air—Earth current values measured at Athens in 1965–1980\n",
-    "# units are A·m^(−2)\n",
-    "# source: Retalis (1991), Fig. 6"
+    ")"
   ]
  },
  {
@ -89,15 +101,15 @@
				@@ -89,15 +101,15 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "carnegie_maud_data = (\n",
-    "    np.array([121, 131, 128, 121, 131, 161,\n",
-    "              180, 111, 127, 121, 117, 126])\n",
-    ")\n",
-    "\n",
    "# potential gradient values measured during Carnegie and Maud expeditions\n",
    "# (1915–1929)\n",
    "# units are V·m^(−1)\n",
-    "# source: Adlerman & Williams (1996), Fig. 7b"
+    "# source: Adlerman & Williams (1996), Fig. 7b\n",
+    "\n",
+    "carnegie_maud_data = (\n",
+    "    np.array([121, 131, 128, 121, 131, 161,\n",
+    "              180, 111, 127, 121, 117, 126])\n",
+    ")"
   ]
  },
  {
@ -107,15 +119,15 @@
				@@ -107,15 +119,15 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# ionospheric potential values measured at different sites over 1955–2004\n",
+    "# units are V\n",
+    "# source: Markson (2007), Fig. 7 (various data)\n",
+    "\n",
    "ip_data = (\n",
    "    np.array([212, 236, 239, 238, 245, 234,\n",
    "              240, 263, 260, 255, 229, 262])\n",
    "    * 10**3\n",
-    ")\n",
-    "\n",
-    "# ionospheric potential values measured at different sites over 1955–2004\n",
-    "# units are V\n",
-    "# source: Markson (2007), Fig. 7 (various data)"
+    ")"
   ]
  },
  {
@ -125,14 +137,22 @@
				@@ -125,14 +137,22 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# potential gradient values measured at Vostok station in (1998–2001)\n",
+    "# units are V·m^(−1)\n",
+    "# source: Burns et al. (2012), Table 2\n",
+    "\n",
    "vostok_old_data = (\n",
    "    np.array([195, 201, 205, 192, 188, 195,\n",
    "              209, 198, 209, 195, 193, 192])\n",
-    ")\n",
-    "\n",
-    "# potential gradient values measured at Vostok station in (1998–2001)\n",
-    "# units are V·m^(−1)\n",
-    "# source: Burns et al. (2012), Table 2"
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5772bcf6-3ee6-49fe-9310-71cdbd09273a",
+   "metadata": {},
+   "source": [
+    "### Figure: Seasonal variation based on earlier measurement results"
   ]
  },
  {
@ -154,32 +174,43 @@
				@@ -154,32 +174,43 @@
   ],
   "source": [
    "fig = plt.figure(figsize=(10, 14), constrained_layout=False)\n",
+    "\n",
+    "# create a list of axes objects to hold subplots.\n",
    "ax = [None for _ in range(6)]\n",
+    "\n",
+    "# configure each subplot in the figure\n",
+    "# subplots are arranged in a 4×4 grid\n",
    "for n in range(6):\n",
    "    ax[n] = fig.add_subplot(4, 4, (2*n + 1, 2*n + 2))\n",
    "\n",
+    "# lower, upper limits, y-axis ticks interval, y-scaling coefficient for each subplot\n",
    "low = [100, 160, 0e-13, 15e-13, 25e-13, 200e3]\n",
    "high = [200, 240, 20e-13, 30e-13, 40e-13, 280e3]\n",
    "step = [20, 20, 5e-13, 5e-13, 5e-13, 20e3]\n",
    "coeff = [1, 1, 1e-12, 1e-12, 1e-12, 1e3]\n",
+    "\n",
    "caption = ['Carnegie and Maud, 1915–1929',\n",
    "           'Vostok station, 1998–2001 (adjusted)',\n",
    "           'Kew, 1930–1931',\n",
    "           'Athens, 1965–1980',\n",
    "           'Mauna Loa, 1977–1983',\n",
    "           'Ion. potent. measurements, 1955–2004']\n",
+    "\n",
    "ins_caption = ['(after $\\it{Adlerman~&~Williams}$, 1996)',\n",
    "               '(after $\\it{Burns~et~al.}$, 2012)',\n",
    "               '(after $\\it{Scrase}$, 1933)',\n",
    "               '(after $\\it{Retalis}$, 1991)',\n",
    "               '(after $\\it{Adlerman~&~Williams}$, 1996)',\n",
    "               '(after $\\it{Markson}$, 2007)']\n",
+    "\n",
    "data = np.array([carnegie_maud_data,\n",
    "                 vostok_old_data,\n",
    "                 kew_data,\n",
    "                 athens_data,\n",
    "                 mauna_loa_data,\n",
    "                 ip_data])\n",
+    "\n",
+    "# assign colors for each dataset\n",
    "col = ['orangered', 'orangered', 'teal', 'teal', 'teal', 'royalblue']\n",
    "\n",
    "for n in range(6):\n",
--- a/3_WRF_T2_images.ipynb
+++ b/3_WRF_T2_images.ipynb
@ -2,7 +2,7 @@
				@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 2,
   "id": "9a80a0a4-ef03-411d-b441-21434947e6ac",
   "metadata": {},
   "outputs": [],
@ -13,7 +13,7 @@
				@@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 3,
   "id": "56ca6e5b-3400-4ba7-a050-9e241da9dea8",
   "metadata": {},
   "outputs": [],
@ -21,6 +21,27 @@
				@@ -21,6 +21,27 @@
    "wrf_mon_T2 = np.load(\"./data/WRF_T2_MONxLAT.npy\")"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "ae9a3b2e-07c2-48aa-9e31-16f8013a30c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(180, 12)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wrf_mon_T2.shape"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 26,
@ -28,10 +49,10 @@
				@@ -28,10 +49,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# area factors for different latitudes\n",
    "area_factor = (\n",
    "    np.cos(np.arange(180) * np.pi / 180) - np.cos(np.arange(1, 181) * np.pi / 180)\n",
-    ") / 2\n",
-    "# area factors for different latitudes"
+    ") / 2"
   ]
  },
  {
@ -51,7 +72,8 @@
				@@ -51,7 +72,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "lat = [20, 30, 40, 50]  # boundary latitudes for the plot"
+    "# boundary latitudes for the plot\n",
+    "lat = [20, 30, 40, 50]"
   ]
  },
  {
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,76 @@
				@@ -0,0 +1,76 @@
+# About the scripts
+
+## Script `1_Earlier_measurements_images.ipynb`
+
+This program contains digitized data from external sources, necessary for constructing Figure 1.
+
+At the beginning of the script, the necessary libraries are loaded and arrays with digitized data are declared; at the end, a graph is constructed. 
+
+Data analysis in this file is minimal - it calculates the amplitude of seasonal variation (as a percentage relative to the annual average value).
+
+## Script `2_Vostok_measurements_images.ipynb`
+
+This script is quite voluminous (for further understanding, see the comments in the code).
+
+Firstly, the introduction of digitized data is repeated in the code (in this case, only for the earlier data from the Vostok station, which are also used in the first script).
+
+### Preparing PG data
+
+Secondly, measurement data from the Vostok station (pre-averaged by the hour) are loaded into Pandas dataframes, both new (dataframes `df_10s` and `df_5min`) and earlier (dataframe `earlier_df_src`) datasets.
+
+New measurements at the Vostok station are combined from hourly data derived from 10-second files and hourly data derived from 5-minute files; it should be noted that the dataset primarily relies on the 10-second data, and the 5-minute data are only used when the 10-second data were unavailable (there were 24 such hours in 2013, 312 in 2015, 1752 in 2017, and 3600 in 2020). The composite series of new measurements is saved in the dataframe `df_src`.
+
+Next, we introduce helper functions. Notably, the `pass_fair_weather` function, when applied to a dataframe, retains only those days when (1) there were no gaps, (2) the potential gradient did not exceed 300 V/m and was non-negative, and (3) the peak-to-peak amplitude was no more than 150% of the average daily value of the potential gradient.
+
+The next helper functions to mention are `calculate_seasonal_var_params` and `std_error`. 
+
+They are structured such that the input to the first function is a dataframe with average daily values, and the function returns (1) an array of 12 average monthly values of PG, (2) an array of 12 counts of fair weather days per month, and (3) an array of 12 sums of squares of the average daily PG values of fair weather divided by the number of fair weather days, annotated by the following formula:
+
+$$
+\text{sum}_j = \frac{\sum\limits_{i}(\text{daily mean PG for the $i$-th fair weather day})^2}{\text{count of fair weather days}},
+$$
+where $j$ denotes the month number ($j = 1 \ldots 12$), and $i$ iterates over all fair weather days for which the month of the date equals $j$.
+
+The `std_error` function is designed to take the output from the `calculate_seasonal_var_param`s function and return 12 values of the standard error, one for each month.
+
+Both described functions are used to compute values necessary for plotting graphs (mean value ± standard error).
+
+For both new and early Vostok data, we apply the `pass_fair_weather` function, resulting in two datasets that contain only the hours of fair weather days (`df` and `earlier_df`)
+
+### Figure 2
+
+To construct Figure 2, using the prepared data and helper functions, we calculate the mean values, the count of fair weather days and standard errors for three sets of data:
+
+1. The complete series of new Vostok data.
+2. The same series up to and including the year 2012.
+3. The same series after the year 2012.
+
+### Figure 3
+
+To construct Figure 3, we transform the Vostok data series into a matrix of 12 months x 24 hours. To do this, we group the original dataframe of fair weather hours by months and hours, and then find the mean value for all data points taken at a specific hour of a specific month (saved in dataframe `sd_df`).
+
+For clarity, we also present slices of this diurnal-seasonal diagram at 3, 9, 15, and 21 hours UTC.
+
+We note that renaming the axes of the multi-index resulting from grouping (`sd_df.index.set_names(['hour', 'month'], inplace=True)`) is not necessary for the code and can be commented out; however, it may be convenient for further work with the diurnal-seasonal dataframe `sd_df`.
+
+### Figure 4
+#### Removal of field anomalies associated with meteorological parameters
+
+First, we load the meteorological datasets (`temp_df`, `wind_df`, `pressure_df`), averaged by days (`vostok_daily_temp`, `vostok_daily_wind`, `vostok_daily_pressure_mm_hg`). For further analysis, we use the `meteo_df` dataframe, which is created by merging the dataframe with daily average potential gradient values (`daily_df`).
+
+Next, we compile arrays of PG anomalies and anomalies for all meteorological parameters. The anomaly is calculated using a moving window of +-10 days.
+
+We then find the regression coefficients `temp_coeffs`, `wind_coeffs`, and `pres_coeffs` between the PG anomaly and the corresponding meteorological parameter anomalies, and calculate some statistical characteristics.
+
+Using the found regression coefficients, we remove the linear relationship with meteorological parameter anomalies. The corrected PG is saved in `meteo_df["CorrectedField"]`.
+
+Finally, we construct Figure 4 using the prepared data in the same manner as was done for Figures 2 and 3.
+
+
+## Script `3_WRF_T2_images.ipynb`
+
+This script calculates the seasonal variation of the 2m-level temperature (T2m) taken from climate modeling results (see article).
+
+In the script, temperature data averaged by longitude and by month are loaded (see data description below) from `WRF_T2_MONxLAT.npy`.
+
+Next, the temperature is averaged across latitude bands 20° S–20° N, 30° S–30° N, 40° S–40° N, and 50° S–50° N. The averaging takes into account the latitudinal area factor; degree cells at higher latitudes are summed with a diminishing coefficient. The results of the averaging (seasonal temperature variation in the specified latitude band) are displayed on a figure consisting of four panels.