Fix comments / fix readme

1 year ago · f3a60924a8
4 changed files with 245 additions and 117 deletions
--- a/1_Earlier_measurements_images.ipynb
+++ b/1_Earlier_measurements_images.ipynb
@ -1,5 +1,21 @@
				@@ -1,5 +1,21 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "63c4ff9b-a25e-400d-8800-70c3b643c091",
+   "metadata": {},
+   "source": [
+    "# Processing digitized data from external sources to construct Figure 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "156aeb14-9418-45a5-90a4-0c9bf7352f6f",
+   "metadata": {},
+   "source": [
+    "### Import libraries"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 1,
--- a/2_Vostok_measurements_images.ipynb
+++ b/2_Vostok_measurements_images.ipynb
@ -5,8 +5,8 @@
				@@ -5,8 +5,8 @@
   "id": "113058f9-8324-4368-99ed-b088987c683a",
   "metadata": {},
   "source": [
-    "# New and earlier Vostok datasets \n",
-    "Seasonal-diurnal diagram, hour crossections, adjustment"
+    "# Comprehensive analysis of Vostok station data\n",
+    "Seasonal-diurnal diagram, hour crossections, adjustment using meteoparameters"
   ]
  },
  {
@ -25,12 +25,14 @@
				@@ -25,12 +25,14 @@
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import datetime as dt\n",
    "import matplotlib.transforms as tf\n",
    "from matplotlib import cm, colors, colormaps\n",
-    "import scipy.stats as st"
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import scipy.stats as st\n",
+    "\n",
+    "import datetime as dt"
   ]
  },
  {
@ -38,7 +40,7 @@
				@@ -38,7 +40,7 @@
   "id": "eb2d634e-ccc5-4fd2-a9e8-05d314f12c1b",
   "metadata": {},
   "source": [
-    "### Source PG datasets"
+    "### Loading source PG datasets"
   ]
  },
  {
@ -48,33 +50,81 @@
				@@ -48,33 +50,81 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# potential gradient values measured at Vostok station in (1998–2001)\n",
+    "# units are V·m^(−1)\n",
+    "# source: Burns et al. (2012), Table 2\n",
+    "\n",
    "vostok_old_data = (\n",
    "    np.array([195, 201, 205, 192, 188, 195,\n",
    "              209, 198, 209, 195, 193, 192])\n",
-    ")\n",
-    "\n",
-    "# potential gradient values measured at Vostok station in (1998–2001)\n",
-    "# units are V·m^(−1)\n",
-    "# source: Burns et al. (2012), Table 2"
+    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
-   "id": "7de3455e-ba80-420d-8fec-8f82ee28804a",
+   "id": "c63b60de-82fa-4dcd-af67-a3d4738a0431",
   "metadata": {},
   "outputs": [],
   "source": [
-    "df_10s = pd.read_csv('./data/vostok_hourly_from_10_s_without_calibration_and_empty.tsv', sep='\\t', parse_dates=['Datetime']).set_index('Datetime')\n",
-    "df_10s[\"Mark\"] = '10s'\n",
+    "# Load hourly data derived from 10-second intervals \n",
+    "# \"Field\" column contains PG values [V/m] without scale and calibration\n",
+    "df_10s = pd.read_csv('./data/vostok_hourly_from_10_s_without_calibration_and_empty.tsv', \n",
+    "                     sep='\\t', parse_dates=['Datetime']).set_index('Datetime')\n",
    "\n",
-    "df_5min = pd.read_csv('./data/vostok_hourly_from_5_min_without_calibration_and_empty.tsv', sep='\\t', parse_dates=['Datetime']).set_index('Datetime')\n",
-    "df_5min[\"Mark\"] = '5min'\n",
-    "\n",
-    "# combine dataframes: fill 10s averaged hours gaps with 10min averaged hours\n",
-    "df_src = df_10s.combine_first(df_5min)\n",
+    "# Add a new column 'Mark' to label this dataframe's data as originating from 10-second averages\n",
+    "df_10s[\"Mark\"] = '10s'  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d2d83b75-97ce-42ba-8a99-8597f84d7946",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load hourly data derived from 5-minute intervals \n",
+    "# \"Field\" column contains PG values [V/m] without scale and calibration\n",
+    "df_5min = pd.read_csv('./data/vostok_hourly_from_5_min_without_calibration_and_empty.tsv', \n",
+    "                      sep='\\t', parse_dates=['Datetime']).set_index('Datetime')\n",
    "\n",
-    "earlier_df_src = pd.read_csv('./data/vostok_1998_2004_hourly_80percent_all.tsv', sep='\\t', parse_dates=['Datetime']).set_index('Datetime')"
+    "# Add a new column 'Mark' to label this dataframe's data as originating from 5-minute averages\n",
+    "df_5min[\"Mark\"] = '5min'  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "26da1848-ad4f-4e58-8a75-d02428fb12c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load earlier dataset (1998-2004) \n",
+    "earlier_df_src = pd.read_csv('./data/vostok_1998_2004_hourly_80percent_all.tsv', \n",
+    "                             sep='\\t', parse_dates=['Datetime']).set_index('Datetime')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d126b8b1-0b66-4817-a556-8f33860608ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Note that all the aforementioned files are averaged with 80% data availability, which \n",
+    "# means that the hourly value was calculated only when at least 80% of the readings were \n",
+    "# available (at least 288 10-second averages or at least 9 5-minute averages)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "7de3455e-ba80-420d-8fec-8f82ee28804a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Combine the two dataframes: fill in gaps in the 10-second data with the 5-minute data.\n",
+    "df_src = df_10s.combine_first(df_5min)"
   ]
  },
  {
@ -87,7 +137,7 @@
				@@ -87,7 +137,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
   "id": "42979690-97b5-4e86-8777-db48896539ac",
   "metadata": {},
   "outputs": [],
@ -96,9 +146,11 @@
				@@ -96,9 +146,11 @@
    "    \"\"\"\n",
    "    Estimate the standard error from the average value\n",
    "    and the average value of the square.\n",
+    "    \n",
    "    :param avg_val: the average value\n",
    "    :param avg_sqr: the average square value\n",
    "    :param counter: the size of the sample\n",
+    "    \n",
    "    :return: the standard error\n",
    "    \"\"\"\n",
    "\n",
@ -107,40 +159,52 @@
				@@ -107,40 +159,52 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
   "id": "b13f968c-f9db-4b05-86ae-0e997383f9c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def pass_fair_weather(df, scale_factor=None):\n",
-    "    # form factor equal to 3 (remove metal pole influence)\n",
-    "    df[\"Field\"] = df[\"Field\"] / scale_factor\n",
+    "    \"\"\"\n",
+    "    Filter and adjust potential gradient (PG) data to represent fair weather conditions.\n",
+    "\n",
+    "    :param df: The input dataframe containing 'Field' values for hourly averaged potential gradient (PG) data\n",
+    "               and 'Count' values indicating the number of valid measurements per day.\n",
+    "    :param scale_factor: A factor used to normalize the 'Field' values to mitigate external influences\n",
+    "                         such as nearby metal structures (default is None, implying no scaling).\n",
+    "\n",
+    "    :return: A dataframe with hourly data filtered to likely represent fair weather conditions only.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # normalize 'Field' values by the scale_factor if provided.\n",
+    "    if scale_factor:\n",
+    "        df[\"Field\"] = df[\"Field\"] / scale_factor\n",
    "    \n",
-    "    # remove days with negative or zero hourly PG value\n",
+    "    # exclude records with non-positive PG values\n",
    "    df = df[df[\"Field\"] > 0]\n",
    "    \n",
-    "    # remove days with PG > 300 V/m\n",
+    "    # exclude records with PG values > 300 V/m\n",
    "    df = df[df[\"Field\"] <= 300]\n",
    "    \n",
-    "    # remove days with incomplete or absent hourly data\n",
+    "    # retain only days with complete hourly data (24 counts)\n",
    "    df[\"Count\"] = df[\"Field\"].resample('D').transform('count')\n",
    "    df = df[df[\"Count\"] == 24]\n",
    "    \n",
-    "    # calculate diurnal field characteristics\n",
+    "    # compute diurnal PG stats\n",
    "    df['Emax'] = df['Field'].resample('D').transform('max')\n",
    "    df['Emin'] = df['Field'].resample('D').transform('min')\n",
    "    df['Emean'] = df['Field'].resample('D').transform('mean')\n",
    "    \n",
-    "    # remove days with relative p-p amplitude more than 150% of diurnal mean\n",
+    "    # exclude days with peak-to-peak amplitude over 150% of the daily mean\n",
    "    df['Var'] = ((df[\"Emax\"]-df[\"Emin\"]))/df[\"Emean\"]\n",
    "    df = df[df['Var'] <= 1.5]\n",
    "    \n",
-    "    # debugging code to calculate the number of hours \n",
+    "    # # debug output for '5min' marked data counts per year\n",
    "    # (averaged from 5-min data) included in individual years \n",
    "    # z = df[df.Mark == '5min']\n",
    "    # print(z.groupby(z.index.year).count()[\"Mark\"])\n",
    "    \n",
-    "    # remove temporary keys from dataframe\n",
+    "    # remove temporary columns used for calculations\n",
    "    df = df[[\"Field\"]]\n",
    "\n",
    "    return df"
@ -148,16 +212,30 @@
				@@ -148,16 +212,30 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 10,
   "id": "350d271c-0337-4e2b-89df-f90a42671d5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_seasonal_var_params(cond_df, key=\"Field\"):\n",
-    "    # monthly mean of fair weather PG\n",
+    "    \"\"\"\n",
+    "    Calculate parameters necessary for assessing the seasonal \n",
+    "    variability of fair weather potential gradients (FW PG).\n",
+    "\n",
+    "    :param cond_df: DataFrame containing conditioned data with a datetime \n",
+    "                    index and at least one numerical column.\n",
+    "    :param key: Column label in 'cond_df' which contains the potential \n",
+    "                gradient data (default is 'Field').\n",
+    "\n",
+    "    :return: A list containing arrays of:\n",
+    "             monthly mean PG values, \n",
+    "             counts of fair weather days, and \n",
+    "             adjusted monthly mean square PG values.\n",
+    "    \"\"\"\n",
+    "    # compute the monthly mean of fair weather PG\n",
    "    mon_pg = cond_df[key].groupby(cond_df.index.month).mean().to_numpy().flatten()\n",
    "\n",
-    "    # count fair weather days in individual months\n",
+    "    # count the number of fair weather days for each month\n",
    "    mon_counter = cond_df.groupby(cond_df.index.month).count().to_numpy().flatten()\n",
    "\n",
    "    # sum((daily mean FW PG)**2)/(count FW days) over months\n",
@ -168,7 +246,7 @@
				@@ -168,7 +246,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 11,
   "id": "89b9f394-8d80-4db6-bd44-0b9778b03f9d",
   "metadata": {},
   "outputs": [],
@ -186,16 +264,35 @@
				@@ -186,16 +264,35 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 12,
   "id": "d1f85dec-45b7-47b2-b421-d528776fff39",
   "metadata": {},
   "outputs": [],
   "source": [
+    "# apply the fair weather filter to the newdataset with a scale factor of 3 to normalize field values.\n",
    "df = pass_fair_weather(df_src, scale_factor=3)\n",
+    "\n",
+    "# apply the fair weather filter to earlier dataset data with a scale factor of 1 for normalization.\n",
    "earlier_df = pass_fair_weather(earlier_df_src, scale_factor=1)\n",
    "\n",
-    "# or for equal mean values of FW histograms from different datasets:\n",
-    "# earlier_df = pass_fair_weather(earlier_df, scale_factor=1.35) "
+    "# Optional: Adjust the scale factor to 1.35 for earlier data to equalize the mean values of FW histograms across datasets.\n",
+    "# earlier_df = pass_fair_weather(earlier_df, scale_factor=1.35)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "fbce521b-c249-4441-b962-9a7991fb521f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # debug figure for check the mean values of FW histograms\n",
+    "# fig, ax = plt.subplots(1,1)\n",
+    "# df.hist(ax=ax,bins=600, color='red')\n",
+    "# earlier_df.hist(ax=ax,bins=600, color='blue')\n",
+    "# plt.xlim(-200,500)\n",
+    "# plt.xlabel('V/m')\n",
+    "# plt.ylabel('Counts')"
   ]
  },
  {
@ -208,31 +305,18 @@
				@@ -208,31 +305,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 14,
   "id": "c339ed78-e4ba-43a9-89b8-85e8a0c57236",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# calculate daily mean of fair weather PG\n",
+    "# resample the filtered data to calculate the daily mean of FW PG\n",
+    "# then drop any days with missing data to ensure completeness of the dataset\n",
+    "\n",
    "daily_df = df.resample('D').mean().dropna()\n",
    "daily_edf = earlier_df.resample('D').mean().dropna()"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "fbce521b-c249-4441-b962-9a7991fb521f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# fig, ax = plt.subplots(1,1)\n",
-    "# df.hist(ax=ax,bins=600, color='red')\n",
-    "# earlier_df.hist(ax=ax,bins=600, color='blue')\n",
-    "# plt.xlim(-200,500)\n",
-    "# plt.xlabel('V/m')\n",
-    "# plt.ylabel('Counts')"
-   ]
-  },
  {
   "cell_type": "markdown",
   "id": "e5a58975-053a-4162-bccb-f0dbef39b0ed",
@ -243,13 +327,14 @@
				@@ -243,13 +327,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 15,
   "id": "6d6b5a71-6dd3-4471-b081-f3606c7b6956",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# calculate seasonal var params (avg FW PG, count FW days, \n",
-    "# sum of squares of FW PG) for specific years\n",
+    "# calculate seasonal variation parameters for specific years: \n",
+    "# average FW PG, count of FW days, and the sum of squares of FW PG hourly values.\n",
+    "\n",
    "data, data_counter, data_sqr = np.array(\n",
    "    [\n",
    "        calculate_seasonal_var_params(daily_df),\n",
@ -263,7 +348,7 @@
				@@ -263,7 +348,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 16,
   "id": "4e29c652-918d-47e2-ac67-1d0c2a898eea",
   "metadata": {},
   "outputs": [
@ -324,7 +409,7 @@
				@@ -324,7 +409,7 @@
    "                       connectionstyle='arc3,rad=0.', fc='black',\n",
    "                       linewidth=0.5\n",
    "                   ))\n",
-    "    # ampl = (np.max(data[n]) - np.min(data[n])) / np.mean(data[n])\n",
+    "\n",
    "    ampl = (np.max(data[n]) - np.min(data[n])) / \\\n",
    "        np.sum(data[n] * data_counter[n]) * np.sum(data_counter[n])\n",
    "    ax[n].text(12.2, (np.min(data[n]) + np.max(data[n])) / 2,\n",
@ -366,11 +451,14 @@
				@@ -366,11 +451,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 17,
   "id": "19eddef4-207d-4fcd-af6b-fc3fcd02e7cd",
   "metadata": {},
   "outputs": [],
   "source": [
+    "# create a seasonal-diurnal array by grouping the data by hour of the day and month, \n",
+    "# then calculating the mean FW PG for each combination.\n",
+    "\n",
    "sd_df = df.groupby([df.index.hour, df.index.month]).mean()\n",
    "sd_df.index.set_names(['hour', 'month'], inplace=True)\n",
    "\n",
@ -380,7 +468,7 @@
				@@ -380,7 +468,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 18,
   "id": "5b03d7c6-e23e-45a9-8188-024d21851176",
   "metadata": {},
   "outputs": [],
@ -402,7 +490,7 @@
				@@ -402,7 +490,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 19,
   "id": "762c01a9-2ee5-428c-b331-ef44d1801127",
   "metadata": {},
   "outputs": [
@ -536,23 +624,18 @@
				@@ -536,23 +624,18 @@
    "pos = (width + wsep1 + widthM) / 2 / width\n",
    "lbl.set_position((pos, 1.))\n",
    "\n",
-    "pg_bound = np.arange(90, 191, 10)\n",
    "# the bounds of the colour bars (in V/m)\n",
+    "pg_bound = np.arange(90, 191, 10)\n",
    "\n",
-    "# colour_hour = [colormaps.get_cmap('inferno')(s / 11) for s in range(12)]\n",
+    "# the colours\n",
    "colour_hour = np.array([[163, 163, 255], [71, 71, 255], [1, 14, 236],\n",
    "                        [4, 79, 149], [6, 143, 63], [49, 192, 13],\n",
    "                        [142, 220, 7], [234, 249, 1], [255, 184, 0],\n",
    "                        [255, 92, 0], [255, 0, 0]]) / 255\n",
-    "# the colours\n",
    "\n",
-    "# pg_cmap = colors.ListedColormap(colour_hour[:-1])\n",
    "pg_cmap = colors.ListedColormap(colour_hour[1:])\n",
    "pg_norm = colors.BoundaryNorm(pg_bound, pg_cmap.N)\n",
    "\n",
-    "# pg_cmap = 'inferno'\n",
-    "# pg_norm = colors.Normalize(vmin=pg_bound[0], vmax=pg_bound[-1])\n",
-    "\n",
    "pg_cax = fig.add_axes(\n",
    "    (\n",
    "        (wsp - wshift) / tot_width,\n",
@ -625,7 +708,7 @@
				@@ -625,7 +708,7 @@
    "                        connectionstyle='arc3,rad=0.', fc='black',\n",
    "                        linewidth=0.5\n",
    "                    ))\n",
-    "    # ampl = (np.max(data[n]) - np.min(data[n])) / np.mean(data[n])\n",
+    "\n",
    "    ampl = (np.max(data[n]) - np.min(data[n])) / \\\n",
    "        np.sum(data[n] * data_counter[n]) * np.sum(data_counter[n])\n",
    "    axB[n].text(12.2, (np.min(data[n]) + np.max(data[n])) / 2,\n",
@ -662,11 +745,15 @@
				@@ -662,11 +745,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 20,
   "id": "d16e0930-f81f-4288-a7a9-2f3942194f81",
   "metadata": {},
   "outputs": [],
   "source": [
+    "# load and filter meteorological data from CSV files into separate dataframes, \n",
+    "# then join them with the daily FW PG dataframe to create a combined dataframe\n",
+    "# without missing values\n",
+    "\n",
    "temp_df = pd.read_csv('./data/vostok_daily_temp.csv', parse_dates=['UTC']).set_index('UTC')\n",
    "temp_df = temp_df[temp_df[\"COUNT\"] == 4][['T']]\n",
    "\n",
@ -681,11 +768,15 @@
				@@ -681,11 +768,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 21,
   "id": "c45bdf6b-d725-4d87-b033-d90fa4f3f9a9",
   "metadata": {},
   "outputs": [],
   "source": [
+    "# calculate anomalies for potential gradient (pg_anom), temperature\n",
+    "# (temp_anom), wind speed (wind_anom), and pressure (pres_anom)\n",
+    "# based on a moving average window of +/-10 days.\n",
+    "\n",
    "pg_anom = np.zeros((len(meteo_df.index)))\n",
    "temp_anom = np.zeros((len(meteo_df.index)))\n",
    "wind_anom = np.zeros((len(meteo_df.index)))\n",
@ -714,7 +805,7 @@
				@@ -714,7 +805,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 22,
   "id": "326311ea-74ce-4de1-ac7a-e30e173fc8d8",
   "metadata": {},
   "outputs": [
@ -750,6 +841,9 @@
				@@ -750,6 +841,9 @@
    }
   ],
   "source": [
+    "# calculate linear regression coefficients for temperature, wind speed \n",
+    "# and pressure anomalies against potential gradient anomalies.\n",
+    "\n",
    "temp_coeffs = np.polyfit(temp_anom, pg_anom, deg=1)\n",
    "wind_coeffs = np.polyfit(wind_anom, pg_anom, deg=1)\n",
    "pres_coeffs = np.polyfit(pres_anom, pg_anom, deg=1)\n",
@ -763,8 +857,8 @@
				@@ -763,8 +857,8 @@
    "    P = len(meteo_df.index)  # the number of points\n",
    "    a = 0.01  # significance level\n",
    "\n",
-    "    q = st.t.ppf(1 - a / 2, P - 2)\n",
-    "    r = q / np.sqrt(q**2 + P - 2)\n",
+    "    q = st.t.ppf(1 - a / 2, P - 2) # critical value\n",
+    "    r = q / np.sqrt(q**2 + P - 2) # threshold correlation coefficient\n",
    "\n",
    "    corr = np.corrcoef(anom, pg_anom)[0, 1]\n",
    "    print(f\"{title}: a={coeffs[0]}\\tb={coeffs[1]}\")\n",
@ -782,11 +876,15 @@
				@@ -782,11 +876,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 23,
   "id": "dafd0f56-b6fa-4883-9640-01fdcb4a3870",
   "metadata": {},
   "outputs": [],
   "source": [
+    "# update the \"CorrectedField\" column (copy of \"Field\" column) in the `meteo_df`\n",
+    "# by subtracting the linear regression adjustments \n",
+    "# based on temperature and wind anomalies.\n",
+    "\n",
    "meteo_df[\"CorrectedField\"] = meteo_df[\"Field\"]\n",
    "meteo_df[\"CorrectedField\"] -= temp_coeffs[0] * (meteo_df[\"T\"] - meteo_df[\"T\"].mean())\n",
    "meteo_df[\"CorrectedField\"] -= wind_coeffs[0] * (meteo_df[\"Ff\"] - meteo_df[\"Ff\"].mean())"
@ -802,13 +900,14 @@
				@@ -802,13 +900,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 24,
   "id": "9a209c25-af4a-4c67-80cc-2ce657f9cb3d",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# calculate seasonal var params (avg FW PG, count FW days, \n",
-    "# sum of squares of FW PG) for specific datasets and fields\n",
+    "# calculate seasonal variation parameters for specific years: \n",
+    "# average FW PG, count of FW days, and the sum of squares of FW PG hourly values\n",
+    "\n",
    "data, data_counter, data_sqr = np.array(\n",
    "    [\n",
    "        calculate_seasonal_var_params(daily_edf[daily_edf.index.year <= 2001]),\n",
@ -823,7 +922,7 @@
				@@ -823,7 +922,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 25,
   "id": "84050d4a-259f-4513-8d91-0ffc9840f542",
   "metadata": {},
   "outputs": [
@ -841,8 +940,7 @@
				@@ -841,8 +940,7 @@
   "source": [
    "fig = plt.figure(figsize=(10, 14), constrained_layout=False)\n",
    "ax = [None for _ in range(4)]\n",
-    "# for n in range(4):\n",
-    "#     ax[n] = fig.add_subplot(2, 2, n + 1)\n",
+    "\n",
    "for n in range(4):\n",
    "    ax[n] = fig.add_subplot(4, 4, (2*n + 1, 2*n + 2))\n",
    "\n",
@ -850,6 +948,7 @@
				@@ -850,6 +948,7 @@
    "high = [220] * 2 + [180] * 2\n",
    "step = [20] * 4\n",
    "coeff = [1] * 4\n",
+    "\n",
    "caption = ['Vostok station, 1998–2001',\n",
    "           'Vostok station, 1998–2001 (adjusted)',\n",
    "           'Vostok station, 2006–2020',\n",
--- a/3_WRF_T2_images.ipynb
+++ b/3_WRF_T2_images.ipynb
@ -1,50 +1,51 @@
				@@ -1,50 +1,51 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0f09a974-37fe-440e-8daf-4aa0f2caae69",
+   "metadata": {},
+   "source": [
+    "# Analysis of temperature seasonal variation across a latitude bands"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a464e3d4-2cd3-470e-8f29-7f8f6793eb6e",
+   "metadata": {},
+   "source": [
+    "### Import libraries"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "9a80a0a4-ef03-411d-b441-21434947e6ac",
   "metadata": {},
   "outputs": [],
   "source": [
+    "# importing the necessary libraries for data visualization and numerical operations\n",
+    "# matplotlib.pyplot is used for plotting graphs, and numpy is used for handling numerical data efficiently\n",
+    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "56ca6e5b-3400-4ba7-a050-9e241da9dea8",
   "metadata": {},
   "outputs": [],
   "source": [
+    "# montly averaged 2-meter level temperature (T2m) for each 1° latitude band across a full 360° longitude\n",
+    "# T2m are calculated with the Weather Research and Forecasting model (WRF) version 4.3.\n",
+    "# shape = (180, 12)\n",
    "wrf_mon_T2 = np.load(\"./data/WRF_T2_MONxLAT.npy\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "id": "ae9a3b2e-07c2-48aa-9e31-16f8013a30c4",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(180, 12)"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "wrf_mon_T2.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 3,
   "id": "4161a87b-68b7-4fc2-aeff-3a05404a4bab",
   "metadata": {},
   "outputs": [],
@ -57,7 +58,7 @@
				@@ -57,7 +58,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 4,
   "id": "3ce18021-7bed-43a9-ba6d-6a4436700fff",
   "metadata": {},
   "outputs": [],
@ -67,7 +68,7 @@
				@@ -67,7 +68,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 5,
   "id": "33118b18-72fc-464a-a710-a7606a031e15",
   "metadata": {},
   "outputs": [],
@ -78,7 +79,7 @@
				@@ -78,7 +79,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 6,
   "id": "4ed444ff-2b06-416a-ac08-bf56eda7ae12",
   "metadata": {},
   "outputs": [
--- a/readme.md
+++ b/readme.md
@ -1,4 +1,10 @@
				@@ -1,4 +1,10 @@
-# About the scripts
+# Short Description of the Scripts
+
+* `1_Earlier_measurements_images.ipynb` plots seasonal variations from external sources
+* `2_Vostok_measurements_images.ipynb` plots seasonal variations and seasonal-dirunal diagram using new and early Vostok PG measurements
+* `3_WRF_T2_images.ipynb` plots seasonal variation of `T2m` temperature averaged across different latitude bands
+
+# Detailed Description of the Scripts

 ## Script `1_Earlier_measurements_images.ipynb`

@ -26,10 +32,9 @@ The next helper functions to mention are `calculate_seasonal_var_params` and `st
				@@ -26,10 +32,9 @@ The next helper functions to mention are `calculate_seasonal_var_params` and `st

 They are structured such that the input to the first function is a dataframe with average daily values, and the function returns (1) an array of 12 average monthly values of PG, (2) an array of 12 counts of fair weather days per month, and (3) an array of 12 sums of squares of the average daily PG values of fair weather divided by the number of fair weather days, annotated by the following formula:

-$$
-\text{sum}_j = \frac{\sum\limits_{i}(\text{daily mean PG for the $i$-th fair weather day})^2}{\text{count of fair weather days}},
-$$
-where $j$ denotes the month number ($j = 1 \ldots 12$), and $i$ iterates over all fair weather days for which the month of the date equals $j$.
+sumₘ = Σ(daily mean PG for the `i`-th fair weather day)² / (count of fair weather days),
+
+where `m` denotes the month number `m = 1...12`, and `i` iterates over all fair weather days for which the month of the date equals `m`.

 The `std_error` function is designed to take the output from the `calculate_seasonal_var_param`s function and return 12 values of the standard error, one for each month.

@ -73,4 +78,11 @@ This script calculates the seasonal variation of the 2m-level temperature (T2m)
				@@ -73,4 +78,11 @@ This script calculates the seasonal variation of the 2m-level temperature (T2m)

 In the script, temperature data averaged by longitude and by month are loaded (see data description below) from `WRF_T2_MONxLAT.npy`.

-Next, the temperature is averaged across latitude bands 20° S–20° N, 30° S–30° N, 40° S–40° N, and 50° S–50° N. The averaging takes into account the latitudinal area factor; degree cells at higher latitudes are summed with a diminishing coefficient. The results of the averaging (seasonal temperature variation in the specified latitude band) are displayed on a figure consisting of four panels.
+Next, the temperature is averaged across latitude bands 20° S–20° N, 30° S–30° N, 40° S–40° N, and 50° S–50° N. The averaging takes into account the latitudinal area factor; degree cells at higher latitudes are summed with a diminishing coefficient. The results of the averaging (seasonal temperature variation in the specified latitude band) are displayed on a figure consisting of four panels.
+
+# Description of the data files
+
+* `WRF_T2_MONxLAT.npy` - a `numpy` array with the shape `(180, 12)`, containing montly averaged 2-meter level temperature (`T2m`) for each 1° latitude band across a full 360° longitude. `T2m` are calculated with the Weather Research and Forecasting model (WRF) version 4.3.
+* `vostok_hourly <...>.txt` - text files containing  two columns, one of which represents the date and time (column `Datetime`) and the other, hourly averaged potential gradient (PG) values on the basis of the latest measurements at the Russian Antarctic station Vostok (column `Field`, the units are V/m).
+* `vostok_1998_2004_hourly_80percent_all.tsv` - the same as the previous file, but these are early data collected by a different sensor during 1998-2004 
+* `vostok_daily <...>.txt` - text files containing three columns: one for the date (column `UTC`), the second for the daily averaged meteorological parameter based on measurements at the Russian Antarctic station Vostok, and the third column `Count` indicating the number of measurements per day (entries with fewer than 4 measurements must be filtered out before analysis).