Enhanced Vostok analysis with solar wind, multiple linear regression, fixed bugs, improved docs+readme, del dupl./temp. code

6 months ago · 4fc15e2bc2
24 changed files with 300707 additions and 1332 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
				@@ -0,0 +1 @@
+.ipynb_checkpoints
--- a/0_prepare_data.ipynb
+++ b/0_prepare_data.ipynb
@ -13,12 +13,12 @@
				@@ -13,12 +13,12 @@
   "id": "5324ceb9-24e7-454b-87b9-ba9a717078ae",
   "metadata": {},
   "source": [
-    "### Import libraries"
+    "### Importing libraries"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "7b2a7f44-b0cb-4471-a0c6-e56da23caf86",
   "metadata": {},
   "outputs": [],
@ -39,43 +39,43 @@
				@@ -39,43 +39,43 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "36b9f49e-32e6-4544-a9d3-f6a8ba49d867",
   "metadata": {},
   "outputs": [],
   "source": [
    "# also available at https://eee.ipfran.ru/files/seasonal-variation-2024/\n",
    "# attention: the files are very large (~ 350 GB totally)\n",
-    "src_path = \"../shared_files/eee_public_files/seasonal-variation-2024/\""
+    "src_path = \"../../shared_files/eee_public_files/seasonal-variation-2024/\""
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "78a4350c-59fb-479a-b7cd-e2bf9b996d36",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# used numbers of simulated days for analysis\n",
+    "# the number of simulated days used for analysis\n",
    "wrf_N_days = 4992\n",
-    "inmcm_N_days = 3650"
+    "inm_N_days = 3650"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "id": "53cb9cc3-0e56-4da4-920b-2f071a0846fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# dates corresponding to the indices (0 axis) of the data arrays\n",
-    "# note: for WRF dates correspond to real dates\n",
-    "\n",
-    "wrf_dt_indicies = np.array(\n",
+    "# note: in the case of the WRF the dates correspond to real dates\n",
+    "# note: in the case of the INMCM the are 10 365-day years\n",
+    "wrf_dt_indices = np.array(\n",
    "    [dt.date(1980, 1, 1) + dt.timedelta(i * 3) for i in range(wrf_N_days)]\n",
    ")\n",
-    "inmcm_dt_indicies = np.array(\n",
-    "    [dt.date(2022, 1, 1) + dt.timedelta(i % 365) for i in range(inmcm_N_days)]\n",
+    "inm_dt_indices = np.array(\n",
+    "    [dt.date(2022, 1, 1) + dt.timedelta(i % 365) for i in range(inm_N_days)]\n",
    ")"
   ]
  },
@ -84,96 +84,62 @@
				@@ -84,96 +84,62 @@
   "id": "5e16ee8e-f3b0-4251-9691-19d7dfd4aff7",
   "metadata": {},
   "source": [
-    "### Preprocessing WRF T2m data"
+    "### Preprocessing T2m data from the WRF"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
-   "id": "87860fa8-0a9c-4304-9c3c-94561c3e966c",
+   "execution_count": 5,
+   "id": "ccff6ad9-dc83-440f-8642-93cfc150b03f",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# air temperature at the height of 2 m with the shape\n",
+    "# air temperature values (in K) at the height of 2 m with the shape\n",
    "# (number of days, number of hours, number of latitudes, number of longitudes)\n",
    "# contains temperature values depending on (d, h, lat, lon)\n",
    "#   d (axis 0) is the number of a day starting with 0 and ending with 5113\n",
    "#              every third day is taken\n",
-    "#              d = 0 corresponds to 1 Jan 1980, \n",
+    "#              d = 0 corresponds to 1 Jan 1980\n",
    "#              d = 5113 corresponds to 30 Dec 2021\n",
    "#              d = 4991 corresponds to 29 Dec 2020\n",
    "#              (we will restrict our attention to 1980–2020)\n",
    "#   h (axis 1) is the hour of the day (an integer in [0, 25])\n",
    "#              the values corresponding to h = 0 and h = 24 are the same\n",
-    "# lat (axis 2) describes the latitude (an integer in [0, 179]) \n",
+    "#              (we delete the 25th value)\n",
+    "# lat (axis 2) describes the latitude (an integer in [0, 179])\n",
    "# lon (axis 3) describes the longitude (an integer in [0, 359])\n",
-    "\n",
    "wrf_T2_data = np.load(f\"{src_path}/WRF-T2-MAP.npy\")[:wrf_N_days, :24]\n",
    "wrf_T2_data_DAYxLAT = wrf_T2_data.mean(axis=(1, 3))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
-   "id": "1124d9f9-95d9-4c02-8176-82b9c0331d34",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(4992, 180)"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "wrf_T2_data_DAYxLAT.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "ec569ffd-93c2-4490-8ba1-69af4fab8f23",
-   "metadata": {},
+   "execution_count": 6,
+   "id": "a7148097-51d8-492b-80c4-7b4174bba4b4",
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [],
   "source": [
-    "# air temperature averaged over latitudes and months\n",
-    "wrf_mon_T2 = np.zeros((180, 12))\n",
+    "# initialising an array to store monthly averaged values\n",
+    "# for different latitudes\n",
+    "wrf_T2_LATxMON = np.zeros((180, 12))\n",
    "\n",
+    "# iterating over month numbers (starting with 0)\n",
    "for month_idx in range(12):\n",
-    "    # filter indicies by month number\n",
-    "    monthly_indicies = [\n",
-    "        i for i, date in enumerate(wrf_dt_indicies) if date.month == month_idx + 1\n",
+    "    # filtering indices by the month number\n",
+    "    wrf_monthly_indices = [\n",
+    "        i for i, date in enumerate(wrf_dt_indices)\n",
+    "        if date.month == month_idx + 1\n",
    "    ]\n",
    "\n",
-    "    # putting values at specific month into averaged array\n",
-    "    wrf_mon_T2[:, month_idx] = wrf_T2_data_DAYxLAT[monthly_indicies].mean(axis=0)-273.15\n",
+    "    # putting the values for the specific month into the array\n",
+    "    # (also converting the values from K to °C)\n",
+    "    wrf_T2_LATxMON[:, month_idx] = (\n",
+    "        wrf_T2_data_DAYxLAT[wrf_monthly_indices].mean(axis=0) - 273.15\n",
+    "    )\n",
    "\n",
-    "np.save(f\"./data/WRF/WRF_T2_LATxMON.npy\",wrf_mon_T2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "b480c05f-4b06-4d33-9527-dbe2655ed251",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "27.894258059212177"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "wrf_mon_T2.max()"
+    "np.save(\"./data/WRF/WRF_T2_LATxMON.npy\", wrf_T2_LATxMON)"
   ]
  },
  {
@ -181,128 +147,137 @@
				@@ -181,128 +147,137 @@
   "id": "46d4f093-a420-42c7-b885-a8409d9d8ee4",
   "metadata": {},
   "source": [
-    "### Preprocessing INMCM and WRF IP: classic parametrization"
+    "### Preprocessing IP data from the INMCM and WRF: the usual parameterisation"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "id": "94a603c3-982d-4c78-be1c-bb6c74b86b5b",
+   "execution_count": 7,
+   "id": "052374df-a505-420a-aa69-2902ce5fc23b",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# dictionaries where processed data is saved\n",
-    "# the dictionary keys represent the threshold value of CAPE\n",
+    "# dictionaries where the processed data are saved\n",
+    "# dictionary keys represent CAPE threshold values\n",
    "\n",
-    "# for storing arrays with averaged by hours and summarized by longitude,\n",
-    "# i.e. with dimensions (4992, 180) for WRF and (3650, 120) for INMCM\n",
-    "wrf_daily_latitudal_ip = {}\n",
-    "inmcm_daily_latitudal_ip = {}\n",
+    "# dictionaries to store diurnal average IP values summed over longitudes\n",
+    "# the dimensions are (4992, 180) for the WRF and (3650, 120) for the INMCM\n",
+    "wrf_daily_lat_ip = {}\n",
+    "inm_daily_lat_ip = {}\n",
    "\n",
-    "# for storing arrays summarized by longitude and latitude,\n",
-    "# i.e. with dimensions (4992, 24) for WRF and (3650, 24) for INMCM\n",
+    "# dictionaries to store hourly IP values summed over longitudes and latitudes\n",
+    "# the dimensions are (4992, 24) for the WRF and (3650, 24) for the INMCM\n",
    "wrf_hourly_total_ip = {}\n",
-    "inmcm_hourly_total_ip = {}"
+    "inm_hourly_total_ip = {}"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "id": "d8e43c4f-59af-483c-8979-535c696abb4e",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# iterating over the CAPE threshold (J/kg) values used in modeling \n",
-    "# for each threshold, there are corresponding model datasets\n",
+    "# iterating over CAPE threshold values (in J/kg) used in modeling \n",
+    "# for each threshold, there are corresponding model data sets\n",
    "for cape_thres in [800, 1000, 1200]:\n",
-    "\n",
    "    # grid cell contributions to the IP (not normalised) with the shape\n",
-    "    # (number of days, number of hours, number of latitudes, number of longitudes)\n",
-    "    wrf_raw_ip_data = np.load(f\"{src_path}/WRF-IP-MAP-{cape_thres}.npy\")[:wrf_N_days]\n",
-    "    # modelled using WRF model with CAPE threshold = `cape_thres` J/kg\n",
+    "    # (number of days, number of hours,\n",
+    "    # number of latitudes, number of longitudes)\n",
+    "    # simulated using the WRF with CAPE threshold = `cape_thres` J/kg\n",
    "    # contains values of contributions to the IP depending on (d, h, lat, lon)\n",
    "    #   d (axis 0) is the number of a day starting with 0 and ending with 5113\n",
    "    #              every third day is taken\n",
-    "    #              d = 0 corresponds to 1 Jan 1980, \n",
+    "    #              d = 0 corresponds to 1 Jan 1980\n",
    "    #              d = 5113 corresponds to 30 Dec 2021\n",
    "    #              d = 4991 corresponds to 29 Dec 2020\n",
    "    #              (we will restrict our attention to 1980–2020)\n",
    "    #   h (axis 1) is the hour of the day (an integer in [0, 24])\n",
    "    #              the values corresponding to h = 0 and h = 24 are the same\n",
-    "    # lat (axis 2) describes the latitude (an integer in [0, 179]) \n",
+    "    #              (we delete the 25th value)\n",
+    "    # lat (axis 2) describes the latitude (an integer in [0, 179])\n",
    "    # lon (axis 3) describes the longitude (an integer in [0, 359])\n",
+    "    wrf_raw_ip_data = np.load(\n",
+    "        f\"{src_path}/WRF-IP-MAP-{cape_thres}.npy\"\n",
+    "    )[:wrf_N_days, :24]\n",
    "\n",
-    "    # discarding the last hour, which duplicates the first one\n",
-    "    wrf_raw_ip_data = wrf_raw_ip_data[:, :24, :, :]\n",
-    "    \n",
-    "    # normalisation of contributions to the IP to the global mean of 240 kV\n",
+    "    # normalising contributions to the IP to the global mean of 240 kV\n",
    "    wrf_raw_ip_data /= (1/240e3) * wrf_raw_ip_data.sum(axis=(-2,-1)).mean()\n",
    "\n",
-    "    # filling dictionaries with averaged arrays\n",
-    "    wrf_daily_latitudal_ip[cape_thres] = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)\n",
+    "    # filling the dictionaries with averaged values\n",
+    "    wrf_daily_lat_ip[cape_thres] = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)\n",
    "    wrf_hourly_total_ip[cape_thres] = wrf_raw_ip_data.sum(axis=(-2, -1))\n",
    "\n",
-    "    np.save(f\"./data/WRF/WRF_HOURLY_TOTAL_IP_{cape_thres}.npy\",\n",
-    "            wrf_hourly_total_ip[cape_thres])\n",
+    "    np.save(\n",
+    "        f\"./data/WRF/WRF_HOURLY_TOTAL_IP_{cape_thres}.npy\",\n",
+    "        wrf_hourly_total_ip[cape_thres]\n",
+    "    )\n",
    "\n",
    "    # grid cell contributions to the IP (not normalised) reshaped to\n",
-    "    # (number of days, number of hours, number of latitudes, number of longitudes)\n",
-    "    inmcm_raw_ip_data = np.load(f\"{src_path}/INMCM-IP-MAP-{cape_thres}.npy\")\\\n",
-    "                          .reshape((inmcm_N_days, 24, 120, 180))\n",
-    "    # modelled using INMCM model with CAPE threshold = `cape_thres` J/kg\n",
+    "    # (number of days, number of hours,\n",
+    "    # number of latitudes, number of longitudes)\n",
+    "    # simulated using the INMCM with CAPE threshold = `cape_thres` J/kg\n",
    "    # contains values of contributions to the IP depending on (d, h, lat, lon)\n",
-    "    #   d (axis 0) is the number of a day (not correspond to real days,\n",
-    "    #              10 consecutive 365-day years have been simulated)\n",
+    "    #   d (axis 0) is the number of a day\n",
+    "    #              (10 consecutive 365-day years have been simulated)\n",
    "    #   h (axis 1) is the hour of the day (an integer in [0, 23])\n",
-    "    # lat (axis 2) describes the latitude (an integer in [0, 179]) \n",
+    "    # lat (axis 2) describes the latitude (an integer in [0, 179])\n",
    "    # lon (axis 3) describes the longitude (an integer in [0, 359])\n",
+    "    inm_raw_ip_data = np.load(\n",
+    "        f\"{src_path}/INMCM-IP-MAP-{cape_thres}.npy\"\n",
+    "    ).reshape((inm_N_days, 24, 120, 180))\n",
    "\n",
-    "    # normalisation of contributions to the IP to the global mean of 240 kV\n",
-    "    inmcm_raw_ip_data /= (1/240e3) * inmcm_raw_ip_data.sum(axis=(-2,-1)).mean()\n",
+    "    # normalising contributions to the IP to the global mean of 240 kV\n",
+    "    inm_raw_ip_data /= (1/240e3) * inm_raw_ip_data.sum(axis=(-2,-1)).mean()\n",
    "\n",
-    "    # filling dictionaries with averaged arrays\n",
-    "    inmcm_daily_latitudal_ip[cape_thres] = inmcm_raw_ip_data.mean(axis=1).sum(axis=-1)\n",
-    "    inmcm_hourly_total_ip[cape_thres] = inmcm_raw_ip_data.sum(axis=(-2, -1))\n",
+    "    # filling the dictionaries with averaged values\n",
+    "    inm_daily_lat_ip[cape_thres] = inm_raw_ip_data.mean(axis=1).sum(axis=-1)\n",
+    "    inm_hourly_total_ip[cape_thres] = inm_raw_ip_data.sum(axis=(-2, -1))\n",
    "\n",
-    "    np.save(f\"./data/INMCM/INMCM_HOURLY_TOTAL_IP_{cape_thres}.npy\",\n",
-    "            inmcm_hourly_total_ip[cape_thres])"
+    "    np.save(\n",
+    "        f\"./data/INMCM/INMCM_HOURLY_TOTAL_IP_{cape_thres}.npy\",\n",
+    "        inm_hourly_total_ip[cape_thres]\n",
+    "    )"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
   "id": "eb28cbc7-eb0a-49be-8cc1-734bba1d06f5",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# iterating over the CAPE threshold (J/kg) values used in modeling \n",
-    "# for each threshold, there are corresponding model datasets\n",
+    "# iterating over CAPE threshold values (in J/kg) used in modeling \n",
+    "# for each threshold, there are corresponding model data sets\n",
    "for cape_thres in [800, 1000, 1200]:\n",
    "\n",
-    "    # initialization of an arrays to store time-averaged data over months\n",
+    "    # initialising arrays to store monthly averaged values\n",
+    "    # for different latitudes\n",
    "    wrf_data_LATxMON = np.zeros((180, 12))\n",
-    "    inmcm_data_LATxMON = np.zeros((120, 12))\n",
+    "    inm_data_LATxMON = np.zeros((120, 12))\n",
    "\n",
-    "    # iteration over month number (starting with 0)\n",
+    "    # iterating over month numbers (starting with 0)\n",
    "    for month_idx in range(12):\n",
-    "\n",
-    "        # filtering day indices belonging to a specific month\n",
-    "        wrf_monthly_indicies = [i for i, date in enumerate(wrf_dt_indicies) \n",
-    "                                if date.month == month_idx + 1]\n",
-    "        inm_monthly_indicies = [i for i, date in enumerate(inmcm_dt_indicies) \n",
-    "                                if date.month == month_idx + 1]\n",
-    "\n",
-    "        # filling with modeling values with a CAPE threshold \n",
-    "        # averaged over months of the year\n",
-    "        wrf_data_MONxLAT[:, month_idx] = \\\n",
-    "            wrf_daily_latitudal_ip[cape_thres][monthly_indicies].mean(axis=0)\n",
-    "        inmcm_data_LATxMON[:, month_idx] = \\\n",
-    "            inmcm_daily_latitudal_ip[cape_thres][monthly_indicies].mean(axis=0)\n",
-    "\n",
-    "    np.save(f\"./data/WRF/WRF_IP_{cape_thres}_LATxMON.npy\",\n",
-    "            wrf_data_MONxLAT)\n",
-    "    np.save(f\"./data/INMCM/INMCM_IP_{cape_thres}_LATxMON.npy\",\n",
-    "            inmcm_data_LATxMON)"
+    "        # filtering indices by the month number\n",
+    "        wrf_monthly_indices = [i for i, date in enumerate(wrf_dt_indices)\n",
+    "                               if date.month == month_idx + 1]\n",
+    "        inm_monthly_indices = [i for i, date in enumerate(inm_dt_indices)\n",
+    "                               if date.month == month_idx + 1]\n",
+    "\n",
+    "        # putting the values for the specific month into the array\n",
+    "        wrf_data_LATxMON[:, month_idx] = \\\n",
+    "            wrf_daily_lat_ip[cape_thres][wrf_monthly_indices].mean(axis=0)\n",
+    "        inm_data_LATxMON[:, month_idx] = \\\n",
+    "            inm_daily_lat_ip[cape_thres][inm_monthly_indices].mean(axis=0)\n",
+    "\n",
+    "    np.save(\n",
+    "        f\"./data/WRF/WRF_IP_{cape_thres}_LATxMON.npy\",\n",
+    "        wrf_data_LATxMON\n",
+    "    )\n",
+    "    np.save(\n",
+    "        f\"./data/INMCM/INMCM_IP_{cape_thres}_LATxMON.npy\",\n",
+    "        inm_data_LATxMON\n",
+    "    )"
   ]
  },
  {
@ -310,102 +285,71 @@
				@@ -310,102 +285,71 @@
   "id": "91bc6d7a-393c-4078-9a6d-1955393d55f5",
   "metadata": {},
   "source": [
-    "### Preprocessing WRF IP: new parametrization"
+    "### Preprocessing IP data from the WRF: the new parameterisation"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "id": "2b6f987e-ba3c-4371-af7b-c9857a7d33d9",
+   "execution_count": 10,
+   "id": "5bc66a8f-aa8f-4681-91a9-60c9fbdbf8f2",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# grid cell contributions to the IP (not normalised) reshaped to\n",
+    "# grid cell contributions to the IP (not normalised) with the shape\n",
    "# (number of days, number of hours, number of latitudes, number of longitudes)\n",
-    "wrf_raw_ip_data = np.load(f\"{src_path}/WRF-IP-MAP-500-T2-25.npy\")[:wrf_N_days]\n",
-    "# modelled using WRF model using new parametrization based on\n",
-    "# CAPE and T2 with corresponding thresholds 500 J/kg and 25°C.\n",
+    "# simulated using the WRF with CAPE threshold = 500 J/kg\n",
+    "# and temperature threshold = 25 °C\n",
    "# contains values of contributions to the IP depending on (d, h, lat, lon)\n",
    "#   d (axis 0) is the number of a day starting with 0 and ending with 5113\n",
    "#              every third day is taken\n",
-    "#              d = 0 corresponds to 1 Jan 1980, \n",
+    "#              d = 0 corresponds to 1 Jan 1980\n",
    "#              d = 5113 corresponds to 30 Dec 2021\n",
    "#              d = 4991 corresponds to 29 Dec 2020\n",
    "#              (we will restrict our attention to 1980–2020)\n",
    "#   h (axis 1) is the hour of the day (an integer in [0, 24])\n",
    "#              the values corresponding to h = 0 and h = 24 are the same\n",
-    "# lat (axis 2) describes the latitude (an integer in [0, 179]) \n",
+    "#              (we delete the 25th value)\n",
+    "# lat (axis 2) describes the latitude (an integer in [0, 179])\n",
    "# lon (axis 3) describes the longitude (an integer in [0, 359])\n",
+    "wrf_raw_ip_data = np.load(\n",
+    "    f\"{src_path}/WRF-IP-MAP-500-T2-25.npy\"\n",
+    ")[:wrf_N_days, :24]\n",
    "\n",
-    "# discarding the last hour, which duplicates the first one\n",
-    "wrf_raw_ip_data = wrf_raw_ip_data[:, :24, :, :]\n",
-    "\n",
-    "# normalisation of contributions to the IP to the global mean of 240 kV\n",
+    "# normalising contributions to the IP to the global mean of 240 kV\n",
    "wrf_raw_ip_data /= (1/240e3) * wrf_raw_ip_data.sum(axis=(-2,-1)).mean()\n",
    "\n",
-    "# filling dictionaries with averaged arrays\n",
+    "# filling the dictionaries with averaged values\n",
    "wrf_daily_latitudal_ip = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)\n",
    "wrf_hourly_total_ip = wrf_raw_ip_data.sum(axis=(-2, -1))\n",
    "\n",
    "np.save(\n",
-    "    f\"./data/WRF/WRF_HOURLY_TOTAL_IP_500_T2_25.npy\",\n",
+    "    \"./data/WRF/WRF_HOURLY_TOTAL_IP_500_T2_25.npy\",\n",
    "    wrf_hourly_total_ip,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "id": "5797fffa-a795-4241-a574-6c95f0195a5d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# iterating over the CAPE threshold (J/kg) values used in modeling \n",
-    "# for each threshold, there are corresponding model datasets\n",
-    "for cape_thres in [800, 1000, 1200]:\n",
-    "\n",
-    "    # initialization of an arrays to store time-averaged data over months\n",
-    "    wrf_data_LATxMON = np.zeros((180, 12))\n",
-    "    inmcm_data_LATxMON = np.zeros((120, 12))\n",
-    "\n",
-    "    # iteration over month number (starting with 0)\n",
-    "    for month_idx in range(12):\n",
-    "\n",
-    "        # filtering day indices belonging to a specific month\n",
-    "        wrf_monthly_indicies = [i for i, date in enumerate(wrf_dt_indicies) \n",
-    "                                if date.month == month_idx + 1]\n",
-    "        inm_monthly_indicies = [i for i, date in enumerate(inmcm_dt_indicies) \n",
-    "                                if date.month == month_idx + 1]\n",
-    "\n",
-    "        # filling with modeling values with a CAPE threshold \n",
-    "        # averaged over months of the year\n",
-    "        wrf_data_MONxLAT[:, month_idx] = \\\n",
-    "            wrf_daily_latitudal_ip[cape_thres][monthly_indicies].mean(axis=0)\n",
-    "        inmcm_data_LATxMON[:, month_idx] = \\\n",
-    "            inmcm_daily_latitudal_ip[cape_thres][monthly_indicies].mean(axis=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
   "id": "17036c19-95f8-40df-a6c9-f8a23cf426f6",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# initialization of a array to store time-averaged data over months\n",
+    "# initialising an array to store monthly averaged values\n",
+    "# for different latitudes\n",
    "wrf_data_LATxMON = np.zeros((180, 12))\n",
    "\n",
-    "# iteration over month number (starting with 0)\n",
+    "# iterating over month numbers (starting with 0)\n",
    "for month_idx in range(12):\n",
-    "    # filtering day indices belonging to a specific month\n",
-    "    monthly_indicies = [i for i, date in enumerate(wrf_dt_indicies)\n",
-    "                        if date.month == month_idx + 1]\n",
+    "    # filtering indices by the month number\n",
+    "    wrf_monthly_indices = [i for i, date in enumerate(wrf_dt_indices)\n",
+    "                           if date.month == month_idx + 1]\n",
    "\n",
-    "    # filling with modeling values averaged over months of the year\n",
+    "    # putting the values for the specific month into the array\n",
    "    wrf_data_LATxMON[:, month_idx] = \\\n",
-    "        wrf_daily_latitudal_ip[monthly_indicies].mean(axis=0)\n",
+    "        wrf_daily_latitudal_ip[wrf_monthly_indices].mean(axis=0)\n",
    "\n",
-    "np.save(f\"./data/WRF/WRF_IP_500_T2_25_LATxMON.npy\", wrf_data_LATxMON)"
+    "np.save(\"./data/WRF/WRF_IP_500_T2_25_LATxMON.npy\", wrf_data_LATxMON)"
   ]
  },
  {
@ -413,12 +357,12 @@
				@@ -413,12 +357,12 @@
   "id": "e24297fc-cf81-4ea7-9a80-cdcaf277474a",
   "metadata": {},
   "source": [
-    "### Saving number of days (used for monthly mean) for each month"
+    "### Saving the number of days for each month (used to compute mean values)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
   "id": "894ad630-17a5-4744-907e-a07768ff7848",
   "metadata": {},
   "outputs": [],
@ -427,24 +371,31 @@
				@@ -427,24 +371,31 @@
    "# necessary for correct averaging due to \n",
    "# different numbers of days in different months\n",
    "\n",
-    "wrf_days = np.array([len([i for i, date in enumerate(wrf_dt_indicies) \n",
-    "                          if date.month == m + 1]) \n",
+    "wrf_days = np.array([len([i for i, date in enumerate(wrf_dt_indices) \n",
+    "                          if date.month == m + 1])\n",
    "                     for m in range(12)])\n",
-    "\n",
-    "inm_days = np.array([len([i for i, date in enumerate(inmcm_dt_indicies) \n",
-    "                          if date.month == m + 1]) \n",
+    "inm_days = np.array([len([i for i, date in enumerate(inm_dt_indices) \n",
+    "                          if date.month == m + 1])\n",
    "                     for m in range(12)])\n",
    "\n",
-    "np.save(f\"./data/WRF/WRF_NUMDAYS_MON.npy\", wrf_days)\n",
-    "np.save(f\"./data/INMCM/INMCM_NUMDAYS_MON.npy\", inm_days)\n",
+    "np.save(\"./data/WRF/WRF_NUMDAYS_MON.npy\", wrf_days)\n",
+    "np.save(\"./data/INMCM/INMCM_NUMDAYS_MON.npy\", inm_days)\n",
    "\n",
-    "# for average over months use\n",
-    "# `(wrf_data_LATxMON[:, :].sum(axis=0)*days).sum()/days.sum()`\n",
-    "# unstead\n",
-    "# `wrf_data_LATxMON[:, :].sum(axis=0).mean()`\n",
-    "# because\n",
+    "# to calculate the annual mean value, use\n",
+    "# `(wrf_data_LATxMON[:, :].sum(axis=0) * days).sum() / days.sum()`\n",
+    "# rather than\n",
+    "# `wrf_data_LATxMON[:, :].sum(axis=0).mean()`,\n",
+    "# since\n",
    "# `((a1+a2+a3)/3 + (b1+b2)/2)/2 != (a1+a2+a3+b1+b2)/5`"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "04edcb46-b3f9-491a-ba88-509d0fceaac5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/1_earlier_measurements_images.ipynb
+++ b/1_earlier_measurements_images.ipynb
@ -5,7 +5,8 @@
				@@ -5,7 +5,8 @@
   "id": "63c4ff9b-a25e-400d-8800-70c3b643c091",
   "metadata": {},
   "source": [
-    "# Processing digitized data from external sources to construct Figure 1"
+    "# Processing the digitised data of earlier measurements\n",
+    "The source code of Figure 1.1"
   ]
  },
  {
@ -13,7 +14,7 @@
				@@ -13,7 +14,7 @@
   "id": "156aeb14-9418-45a5-90a4-0c9bf7352f6f",
   "metadata": {},
   "source": [
-    "### Import libraries"
+    "### Importing libraries"
   ]
  },
  {
@ -42,8 +43,8 @@
				@@ -42,8 +43,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Define an array of abbreviated month names to use as labels on the x-axis of plots\n",
-    "month_name = ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D']"
+    "# defining an array of abbreviated month names to use as labels\n",
+    "month_name = [\"J\", \"F\", \"M\", \"A\", \"M\", \"J\", \"J\", \"A\", \"S\", \"O\", \"N\", \"D\"]"
   ]
  },
  {
@ -146,11 +147,11 @@
				@@ -146,11 +147,11 @@
  {
   "cell_type": "code",
   "execution_count": 8,
-   "id": "b2c21350-cad0-42ee-a56b-a97a7a712961",
+   "id": "e17b9fcc-b96b-46f0-a268-8e4188f92a8a",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# potential gradient values measured at Vostok station in (1998–2001)\n",
+    "# potential gradient values measured at Vostok station in 1998–2001\n",
    "# units are V·m^(−1)\n",
    "# source: Burns et al. (2012), Table 2\n",
    "\n",
@ -166,7 +167,7 @@
				@@ -166,7 +167,7 @@
   "metadata": {},
   "source": [
    "### Figure 1.1\n",
-    "Seasonal variation based on earlier measurement results"
+    "Seasonal variation of the GEC according to earlier studies"
   ]
  },
  {
@ -189,33 +190,29 @@
				@@ -189,33 +190,29 @@
   "source": [
    "fig = plt.figure(figsize=(10, 14), constrained_layout=False)\n",
    "\n",
-    "# create a list of axes objects to hold subplots.\n",
    "ax = [None for _ in range(6)]\n",
    "\n",
-    "# configure each subplot in the figure\n",
-    "# subplots are arranged in a 4×4 grid\n",
    "for n in range(6):\n",
    "    ax[n] = fig.add_subplot(4, 4, (2*n + 1, 2*n + 2))\n",
    "\n",
-    "# lower, upper limits, y-axis ticks interval, y-scaling coefficient for each subplot\n",
    "low = [100, 160, 0e-13, 15e-13, 25e-13, 200e3]\n",
    "high = [200, 240, 20e-13, 30e-13, 40e-13, 280e3]\n",
    "step = [20, 20, 5e-13, 5e-13, 5e-13, 20e3]\n",
    "coeff = [1, 1, 1e-12, 1e-12, 1e-12, 1e3]\n",
    "\n",
-    "caption = ['Carnegie and Maud, 1915–1929',\n",
-    "           'Vostok station, 1998–2001 (adjusted)',\n",
-    "           'Kew, 1930–1931',\n",
-    "           'Athens, 1965–1980',\n",
-    "           'Mauna Loa, 1977–1983',\n",
-    "           'Ion. potent. measurements, 1955–2004']\n",
+    "caption = [\"Carnegie and Maud, 1915–1929\",\n",
+    "           \"Vostok station, 1998–2001 (adjusted)\",\n",
+    "           \"Kew, 1930–1931\",\n",
+    "           \"Athens, 1965–1980\",\n",
+    "           \"Mauna Loa, 1977–1983\",\n",
+    "           \"Ion. potent. measurements, 1955–2004\"]\n",
    "\n",
-    "ins_caption = ['(after $\\it{Adlerman~&~Williams}$, 1996)',\n",
-    "               '(after $\\it{Burns~et~al.}$, 2012)',\n",
-    "               '(after $\\it{Scrase}$, 1933)',\n",
-    "               '(after $\\it{Retalis}$, 1991)',\n",
-    "               '(after $\\it{Adlerman~&~Williams}$, 1996)',\n",
-    "               '(after $\\it{Markson}$, 2007)']\n",
+    "ins_caption = [\"(after $\\it{Adlerman~&~Williams}$, 1996)\",\n",
+    "               \"(after $\\it{Burns~et~al.}$, 2012)\",\n",
+    "               \"(after $\\it{Scrase}$, 1933)\",\n",
+    "               \"(after $\\it{Retalis}$, 1991)\",\n",
+    "               \"(after $\\it{Adlerman~&~Williams}$, 1996)\",\n",
+    "               \"(after $\\it{Markson}$, 2007)\"]\n",
    "\n",
    "data = np.array([carnegie_maud_data,\n",
    "                 vostok_old_data,\n",
@ -224,66 +221,65 @@
				@@ -224,66 +221,65 @@
    "                 mauna_loa_data,\n",
    "                 ip_data])\n",
    "\n",
-    "# assign colors for each dataset\n",
-    "col = ['orangered', 'orangered', 'teal', 'teal', 'teal', 'royalblue']\n",
+    "col = [\"orangered\", \"orangered\", \"teal\", \"teal\", \"teal\", \"royalblue\"]\n",
    "\n",
    "for n in range(6):\n",
-    "    for axis in ['top', 'bottom', 'left', 'right']:\n",
+    "    for axis in [\"top\", \"bottom\", \"left\", \"right\"]:\n",
    "        ax[n].spines[axis].set_linewidth(0.5)\n",
-    "    ax[n].tick_params(length=6, width=0.5, axis='y')\n",
-    "    ax[n].tick_params(length=0, width=0.5, axis='x')\n",
-    "    ax[n].grid(color='0.', linewidth=0.5, axis='y')\n",
+    "    ax[n].tick_params(length=6, width=0.5, axis=\"y\")\n",
+    "    ax[n].tick_params(length=0, width=0.5, axis=\"x\")\n",
+    "    ax[n].grid(color=\"0.\", linewidth=0.5, axis=\"y\")\n",
    "\n",
    "    ax[n].set_xlim((-0.5, 11.5))\n",
    "    ax[n].set_xticks(np.arange(12))\n",
-    "    ax[n].set_xticklabels(month_name, fontsize='large', va='top')\n",
+    "    ax[n].set_xticklabels(month_name, fontsize=\"large\", va=\"top\")\n",
    "\n",
    "    ax[n].set_ylim((low[n], high[n]))\n",
    "    ax[n].set_yticks(np.arange(low[n], high[n] + step[n] / 2, step[n]))\n",
    "    if n in [2, 3, 4]:\n",
-    "        ax[n].set_yticklabels([f'{x:.1f}' for x in\n",
+    "        ax[n].set_yticklabels([f\"{x:.1f}\" for x in\n",
    "                               np.arange(low[n], high[n] + step[n] / 2,\n",
    "                                         step[n]) / coeff[n]\n",
    "                               ],\n",
-    "                              fontsize='large')\n",
-    "        ax[n].set_ylabel('Monthly mean fair-weather\\n'\n",
-    "                         'air—Earth current, pA/m²',\n",
-    "                         fontsize='large')\n",
+    "                              fontsize=\"large\")\n",
+    "        ax[n].set_ylabel(\"Monthly mean fair-weather\\n\"\n",
+    "                         \"air—Earth current, pA/m²\",\n",
+    "                         fontsize=\"large\")\n",
    "    else:\n",
    "        ax[n].set_yticklabels((np.arange(low[n], high[n] + step[n] / 2,\n",
    "                                         step[n]) / coeff[n]).astype(int),\n",
-    "                              fontsize='large')\n",
+    "                              fontsize=\"large\")\n",
    "        if n in [0, 1]:\n",
-    "            ax[n].set_ylabel('Monthly mean fair-weather\\n'\n",
-    "                             'potential gradient, V/m',\n",
-    "                             fontsize='large')\n",
+    "            ax[n].set_ylabel(\"Monthly mean fair-weather\\n\"\n",
+    "                             \"potential gradient, V/m\",\n",
+    "                             fontsize=\"large\")\n",
    "        else:\n",
-    "            ax[n].set_ylabel('Monthly mean\\nionospheric potential, kV',\n",
-    "                             fontsize='large')\n",
+    "            ax[n].set_ylabel(\"Monthly mean\\nionospheric potential, kV\",\n",
+    "                             fontsize=\"large\")\n",
    "\n",
-    "    ax[n].set_title(caption[n], fontsize='large')\n",
+    "    ax[n].set_title(caption[n], fontsize=\"large\")\n",
    "    ax[n].text(0.5,\n",
    "               1 - 0.01\n",
    "               * (ax[n].get_position().x1 - ax[n].get_position().x0)\n",
    "               / (ax[n].get_position().y1 - ax[n].get_position().y0)\n",
    "               * fig.get_size_inches()[0] / fig.get_size_inches()[1],\n",
    "               ins_caption[n],\n",
-    "               fontsize='large', ha='center', va='top',\n",
+    "               fontsize=\"large\", ha=\"center\", va=\"top\",\n",
    "               transform=ax[n].transAxes)\n",
    "\n",
-    "    ax[n].annotate('', xy=(12, np.min(data[n])), xycoords='data',\n",
-    "                   xytext=(12, np.max(data[n])), textcoords='data',\n",
+    "    ax[n].annotate(\"\", xy=(12, np.min(data[n])), xycoords=\"data\",\n",
+    "                   xytext=(12, np.max(data[n])), textcoords=\"data\",\n",
    "                   annotation_clip=False,\n",
    "                   arrowprops=dict(\n",
-    "                       arrowstyle='<|-|>,head_length=0.8,head_width=0.3',\n",
+    "                       arrowstyle=\"<|-|>,head_length=0.8,head_width=0.3\",\n",
    "                       patchA=None, patchB=None, shrinkA=0., shrinkB=0.,\n",
-    "                       connectionstyle='arc3,rad=0.', fc='black',\n",
+    "                       connectionstyle=\"arc3,rad=0.\", fc=\"black\",\n",
    "                       linewidth=0.5\n",
    "                   ))\n",
    "    ampl = (np.max(data[n]) - np.min(data[n])) / np.mean(data[n])\n",
    "    ax[n].text(12.2, (np.min(data[n]) + np.max(data[n])) / 2,\n",
-    "               f'{ampl * 100:.0f}%',\n",
-    "               fontsize='large', ha='left', va='center', rotation=270)\n",
+    "               f\"{ampl * 100:.0f}%\",\n",
+    "               fontsize=\"large\", ha=\"left\", va=\"center\", rotation=270)\n",
    "\n",
    "fig.align_ylabels([ax[0], ax[2], ax[4]])\n",
    "fig.align_ylabels([ax[1], ax[3], ax[5]])\n",
@ -292,14 +288,22 @@
				@@ -292,14 +288,22 @@
    "    ax[n].bar(np.arange(12), data[n], 0.8, color=col[n])\n",
    "\n",
    "for n in range(6):\n",
-    "    ax[n].text(-0.3, 1.05, chr(ord('a') + n), fontsize='x-large',\n",
-    "               fontweight='semibold', ha='left', va='bottom',\n",
+    "    ax[n].text(-0.3, 1.05, chr(ord(\"a\") + n), fontsize=\"x-large\",\n",
+    "               fontweight=\"semibold\", ha=\"left\", va=\"bottom\",\n",
    "               transform=ax[n].transAxes)\n",
    "\n",
    "fig.subplots_adjust(hspace=0.3, wspace=1.6)\n",
    "\n",
-    "fig.savefig('figures_two_parts/earlier_measurements.eps', bbox_inches='tight')"
+    "fig.savefig(\"figures/earlier_measurements.eps\", bbox_inches=\"tight\")"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9469043f-49c8-4c14-8af2-67d75e6af2d2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/2_Vostok_measurements_images.ipynb
+++ b/2_Vostok_measurements_images.ipynb
--- a/3_WRF_T2_images.ipynb
+++ b/3_WRF_T2_images.ipynb
--- a/4_IP_simulations_temporal_images.ipynb
+++ b/4_IP_simulations_temporal_images.ipynb
--- a/5_IP_simulations_spatial_images.ipynb
+++ b/5_IP_simulations_spatial_images.ipynb
--- a/data/Vostok/vostok_1998_2020_hourly_SWIP.txt
+++ b/data/Vostok/vostok_1998_2020_hourly_SWIP.txt
--- a/data/Vostok/vostok_2006_2020_results.npz
+++ b/data/Vostok/vostok_2006_2020_results.npz
--- a/data/Vostok/vostok_diurnal_2006_2020.npy
+++ b/data/Vostok/vostok_diurnal_2006_2020.npy
--- a/figures/earlier_measurements.eps
+++ b/figures/earlier_measurements.eps
--- a/figures/ip_decomposed.eps
+++ b/figures/ip_decomposed.eps
--- a/figures/ip_pg_partial.eps
+++ b/figures/ip_pg_partial.eps
--- a/figures/ip_pg_total.eps
+++ b/figures/ip_pg_total.eps
--- a/figures/ip_separate.eps
+++ b/figures/ip_separate.eps
--- a/figures/new_parameterisation.eps
+++ b/figures/new_parameterisation.eps
--- a/figures/pg_3_years.eps
+++ b/figures/pg_3_years.eps
--- a/figures/pg_corrected.eps
+++ b/figures/pg_corrected.eps
--- a/figures/pg_diurnal_seasonal.eps
+++ b/figures/pg_diurnal_seasonal.eps
--- a/figures/pg_earlier.eps
+++ b/figures/pg_earlier.eps
--- a/figures/pg_individual.eps
+++ b/figures/pg_individual.eps
--- a/figures/pg_total_partial.eps
+++ b/figures/pg_total_partial.eps
--- a/figures/t2.eps
+++ b/figures/t2.eps
--- a/readme.md
+++ b/readme.md
@ -1,102 +1,252 @@
				@@ -1,102 +1,252 @@
-# Short Description of the Scripts
+## The seasonal variation of the DC global electric circuit 
+In this repository you can find the measurement and modelling data, the research code, and a comprehensive description of the code developed for our two papers on atmospheric electricity:

-> **_Note:_** For analysis, we use simulation data of the ionospheric potential through climate models. Since these data are very large (around 350 Gb), we only upload preprocessed lower-dimensional data (around 20 Mb) to the repository. Data preparation is possible using the script `0_prepare_data.ipynb`, but this would require downloading large files from https://eee.ipfran.ru/files/seasonal-variation-2024/.
+First paper (**Part 1: A new analysis based on long-term measurements in Antarctica**) focuses on the seasonal variation of the global electric circuit (GEC) based on electric field measurements at the Vostok station in Antarctica during 2006–2020. The key findings include:

-* `1_Earlier_measurements_images.ipynb` plots seasonal variations from external sources
-* `2_Vostok_measurements_images.ipynb` plots seasonal variations and seasonal-dirunal diagram using new and early Vostok PG measurements
-* `3_WRF_T2_images.ipynb` plots seasonal variation of `T2m` temperature averaged across different latitude bands
-* `4_IP_simulations_temporal_images.ipynb` plots seasonal variation of simulated IP grouped by datasets and different year ranges
-* `5_IP_simulations_spatial_images.ipynb` plots seasonal variation of simulated IP grouped by latitude ranges
+* The annual behaviour of the GEC intensity is difficult to determine reliably, as seasonal variation patterns at specific UTC hours can significantly differ from the seasonal variation of the diurnal mean.
+* Potential gradient data from the Vostok station indicate that the daily mean GEC intensity reaches its maximum during the Northern Hemisphere summer (with a small local minimum inside this summer maximum) and its minimum during the Northern Hemisphere winter.
+* These seasonal changes are linked to the annual cycle of insolation, which influences the dynamics of global convection intensity, and to the distribution of land and ocean between the hemispheres.
+
+Second paper (**Part 2: Further analysis based on simulations**) further analyses the seasonal variation of the GEC on the basis of simulations with models of atmospheric dynamics. The key findings are as follows:
+
+* The equatorial contribution to the GEC exhibits two maxima following the equinoxes, while each of non-equatorial contributions has one maximum during the local summer.
+* The overall annual variation of the GEC is rather subtle and represents a combination of complex patterns where different contributions partially offset each other, making it challenging to simulate.
+* Simulations better agree with observations when surface air temperature is included alongside convection parameters in the ionospheric potential parameterisation.
+
+> **_Note:_** As soon as the articles are published, we will add links to them here.
+
+## Short Description of the Scripts
+
+> **_Note:_** We use some data sets containing ionospheric potential values simulated via climate and weather forecasting models. Since these raw data sets are very large (around 350 Gb), we only upload preprocessed lower-dimensional data sets (around 20 Mb) to the repository. This preprocessing can be reproduced using the script `0_prepare_data.ipynb`, but this would require downloading the raw data sets from https://eee.ipfran.ru/files/seasonal-variation-2024/.
+
+* `1_Earlier_measurements_images.ipynb`: the seasonal variation of the GEC based on the results of earlier studies (with the data taken from other sources)
+* `2_Vostok_measurements_images.ipynb`: the seasonal variation and a diurnal-seasonal diagram for the GEC on the basis of potential gradient data from Vostok (both the new data for 2006–2020 and the earlier data for 1998–2001)
+* `3_WRF_T2_images.ipynb`: the seasonal variation of air temperature at the height of 2 m (`T2m`) averaged over different latitude ranges
+* `4_IP_simulations_temporal_images.ipynb`: the seasonal variation of simulated IP values for different models and parameterisations
+* `5_IP_simulations_spatial_images.ipynb`: the decomposition of the seasonal variation into contributions to the IP from different latitude ranges and a new parameterisation of the IP

 > **_Note:_** The scripts should be executed sequentially one after another; at the very least, scripts 4 and 5 should be run after script 2. This is necessary because script 2 saves intermediate arrays of preprocessed data from the Vostok station, which are used in scripts 4 and 5.

-# Detailed Description of the Scripts
+## Detailed Description of the Scripts
+
+> **_Note:_** Here and throughout the script descriptions, it will be specified which calculations correspond to which figures. The figure number is presented in the format 1.x or 2.x, where the first digit refers to the article (Part 1 or Part 2) and the second digit indicates the figure number within the article. If the figure number starts with the letter "S" (e.g., 1.S1), it refers to a figure in the Supporting Information for the corresponding article.
+
+### 1. The script `1_Earlier_measurements_images.ipynb`
+
+This script imports the digitised data of earlier measurements from other sources, required for constructing Figure 1.1.
+
+The data analysis in this file is minimal, focusing only on calculating the amplitude of the seasonal variation as a percentage of the annual average value.
+
+### 2. The script `2_Vostok_measurements_images.ipynb`
+
+Firstly, we present two data sets based on earlier publications: `vostok_old_data` (adjusted potential gradient values for 1998–2001) is taken directly from Burns et al. (2012); `vostok_old_data_unadjusted` (raw data for the same time period) is inferred from Burns et al. (2012) by subtracting from the adjusted values the linear corrections indicated in the same paper. These two data sets will be required to construct a series of figures comparing our results with earlier studies.
+
+#### 2.1. Preparing potential gradient data
+
+Secondly, hourly averaged results of potential gradient measurements from the Vostok station are loaded into pandas dataframes for both new (dataframes `df_10s` and `df_5min`) and earlier (dataframe `earlier_df_src`) data sets.
+
+New measurements at the Vostok station are combined from hourly data derived from 10-second files and hourly data derived from 5-minute files; it should be noted that the data set primarily relies on the 10-second data, and the 5-minute data are only used when owing to technical reasons the 10-second data are unavailable. The combined series of new measurements is saved in the dataframe `df_src`.
+
+> **_Note:_** When averaging the source data into the hourly series, we used only those hours for which at least 80% of the records were present, i.e. 288 for 10-second data or 10 for 5-minute data. Note that here we do not publish the code for preparing the hourly time series (it is very simple) and the original data with higher resolution.
+
+#### 2.2. Taking form factors into account
+
+At the Vostok station potential gradient measurements should be corrected for the influence of the metal pole supporting the field mill by applying a form factor. The form factor accounting for the gradual reduction in the pole’s height due to the constant snow accumulation decreases linearly from 3 in the beginning of 2006 (for a 2.95 m high pole) to 2.4 at the end of 2024 (for a 1.75 m high pole).
+
+To apply the correction, we add a `Factor` column to the `df_src` DataFrame. This column is then used to convert the sensor-measured values into absolute potential gradient values.
+
+> **_Note:_** Since the form factor for the field mill during the 1998–2001 period is unknown and the reported field values from the earlier Vostok data do not differ significantly from the corrected new data, we do not apply this procedure to the older data and leave them as they are.
+
+#### 2.3. Helper functions and fair weather criteria
+
+Next, we introduce helper functions. Notably, the `pass_fair_weather` function, when applied to a dataframe (like `df_src` or `earlier_df_src`), retains only those days when (1) there were no gaps, (2) the potential gradient did not exceed 300 V/m and was non-negative, and (3) the peak-to-peak amplitude was no more than 150% of the daily average value of the potential gradient.
+
+The next helper functions to mention are `calculate_seasonal_var_params` and `std_error`.
+
+They are structured such that the input to `calculate_seasonal_var_params` is a dataframe with daily average values, and the function returns:
+
+1. An array of 12 monthly average values of the potential gradient,
+2. An array of 12 monthly counts of fair-weather days,
+3. An array of 12 monthly average values of the square of the daily mean potential gradient.
+
+`std_error` is designed to take the output from `calculate_seasonal_var_params` and return 12 values of the standard error, one for each month.
+
+Both described functions are used to compute values necessary for plotting graphs (mean value ± one standard error).
+
+For both new and early Vostok data, we apply the `pass_fair_weather` function, resulting in two data sets that contain only the hours of fair-weather days (in code named as `df` and `earlier_df`)
+
+#### 2.4. Resampling daily fair-weather data sets
+
+Since in paper we focus on studying monthly mean values, using hourly dataframes for all calculations might be excessive. Therefore we average both dataframes (pre-filtered to include only fair-weather days) over days, so that each day corresponds to a single potential gradient value.
+
+Such daily-resolution arrays are stored in the variables `daily_df` and `daily_edf`.
+
+#### 2.5. Constructing Figure 1.2, Figure 1.S1 and Figure 1.S2
+
+Figure 1.2 illustrates the seasonal variation (based on the new data) across different multi-year periods. To create it, we used the prepared data (daily_df) and helper functions to compute the mean values, the number of fair-weather days and the standard errors for three data sets:
+
+1. The complete series of the new Vostok data (2006–2020).
+2. The same data for 2006–2012.
+3. The same data for 2013–2020.
+
+> **_Note:_** The data from this figure are saved in the temporary file `vostok_2006_2020_results.npz` for use in the second article. This helps us to avoid duplicating the code or merging the code to build different entities in a single cumbersome file.
+
+Figure 1.S1 illustrates the seasonal variation (based on the new data) for individual years. Using the same process as was employed in the case of Figure 1.2, we compute the mean values, the number of fair-weather days and standard errors, but this time for each year separately (we obtain 15 data sets, one for each year in 2006-2020).
+
+Figure 1.S2 is based on a similar analysis performed separately for individual 3-year periods.
+
+#### 2.6. Constructing Figure 1.3
+
+Figure 1.3 presents a diurnal-seasonal diagram. To construct it, we transform the Vostok data series into a 12-month × 24-hour matrix. This is achieved by grouping the original `df` dataframe of fair-weather hours by month and hour, then calculating the mean value for all data points corresponding to each specific hour of a specific month (stored in the `sd_df` dataframe).
+
+For clarity, we also present cross-sections of this diurnal-seasonal diagram at 3, 9, 15 and 21 hours UTC.
+
+> **_Note:_** Renaming the axes of the multi-index resulting from grouping (`sd_df.index.set_names(['hour', 'month'], inplace=True)`) is not required for the code to function and can be commented out. However, it may be convenient for further work with the diurnal-seasonal dataframe `sd_df`.
+
+#### 2.7. Data adjustment
+##### 2.7.1. Adjustment for local meteorological influences
+
+First, we load the meteorological data sets (`temp_df`, `wind_df`, `pressure_df`), averaged over days (`vostok_daily_temp`, `vostok_daily_wind`, `vostok_daily_pressure_mm_hg`). For further analysis, we use the `meteo_df` dataframe, which is created by merging the meteorological dataframes with daily average potential gradient values (`daily_df`).
+
+Next, we compile arrays of potential gradient anomalies and anomalies for all meteorological parameters. The anomalies are calculated using a moving window of ±10 days. We then study the correlation between meteorological parameters and potential gradient values and estimate their statistical significance. The correlation with pressure turned out to be insignificant and was therefore excluded.
+
+We then find multiple linear regression coefficients `temp_coeff` and `wind_coeff` for the anomalies of the potential gradient, temperature and wind speed. Using the obtained coefficients, we subtract a linear relationship with meteorological parameters (temperature and wind speed) from potential gradient values. The corrected potential gradient is saved in `meteo_df["CorrectedField"]`.
+
+##### 2.7.2. Adjustment for the solar-wind-imposed potential difference
+
+First, we load the data set containing the solar-wind-imposed potentials over Vostok (`swip_df`) from a text file containing hourly values of the ionospheric potential contribution over the southern polar cap from 1998 to 2020. Note that these values were calculated using the publicly available Weimer (2005) model.
+<!-- The calculation procedure is described separately in Section 6 below. -->
+
+Similarly to how we combined potential gradient measurements and meteorological data into `meteo_df` to facilitate their processing, here we also merge the original potential gradient data (`daily_df`) with the solar-wind-imposed potentials into the `solar_df` dataframe.
+
+The procedure for identifying correlations, determining the regression coefficient, and removing the trend from the measured potential gradient values is nearly identical to the one described in the previous section.
+
+We apply the solar-wind-imposed correction to two data sets: `meteo_solar_df` (obtained from merging `meteo_df` with `swip_df`; here the potential gradient values are adjusted for both meteorological influences and the solar wind) and `solar_df` (which takes into account only the correction for solar wind).
+
+##### 2.7.3. Construct Figure 1.5 and Figure 1.S3
+
+Finally, we construct Figures 1.5 and 1.S3 using the prepared data in the same manner as it was done for Figures 1.2 and 1.3.
+
+Figure 1.5 presents the monthly mean fair-weather potential gradient at the Vostok station under different conditions:
+
+1. Unadjusted data from 1998–2001 (from Burns et al., 2012).
+2. Data from 1998–2001 adjusted for meteorological and solar wind influences (from Burns et al., 2012).
+3. Unadjusted data from 2006–2020.
+4. Data from 2006–2020 adjusted for meteorological influences.
+5. Data from 2006–2020 adjusted for solar wind influences.
+6. Data from 2006–2020 adjusted for both meteorological and solar wind influences.
+
+Each subplot visualises the seasonal variation for the corresponding data set, with error bars indicating standard errors. The amplitude of the variation (as a percentage of the mean) is also annotated.
+
+Figure 1.S2 shows the same data as panel 1 of Figure 1.5, but the analysis is based on our own calculations rather than on those by Burns et al. (2012).
+
+### 3. The script `3_WRF_T2_images.ipynb`
+
+This script calculates the seasonal variation of air temperature at the height of 2 m (`T2m`), derived from global simulations of atmospheric dynamics.

-## Script `1_Earlier_measurements_images.ipynb`
+In the script the average temperatures for different latitudes and months are loaded from `WRF_T2_MONxLAT.npy` (see the data description below).

-This program contains digitized data from external sources, necessary for constructing Figure 1.1.
+The temperature is then averaged across the latitude ranges of 20°S–20°N, 30°S–30°N, 40°S–40°N and 50°S–50°N, taking into account the area variation with latitude (1° wide cells at higher latitudes are weighted with a diminishing coefficient). The results of the averaging (the seasonal variation of temperature within the specified latitude bands) are presented in Figures 1.4 and 2.3, consisting of four panels each.

-At the beginning of the script, the necessary libraries are loaded and arrays with digitized data are declared; at the end, a graph is constructed. 
+### 4. The script `4_IP_simulations_temporal_images.ipynb`

-Data analysis in this file is minimal - it calculates the amplitude of seasonal variation (as a percentage relative to the annual average value).
+This script implements the calculation of the seasonal variation of the simulated ionospheric potential using the data from two different models of atmospheric dynamics (the WRF and the INMCM). Specifically, it utilises different data sets of the modelled ionospheric potential, various time periods and so on.

-## Script `2_Vostok_measurements_images.ipynb`
+> **_Note:_** Unlike previous scripts, this and subsequent scripts do not use pandas for data analysis. This is because all the input data for ionospheric potential simulations (and temperature) are stored in binary `.npy` (numpy) format.

-This script is quite voluminous (for further understanding, see the comments in the code).
+The import of libraries and helper functions in this script is similar to the previous ones and will not be discussed in detail.

-Firstly, the introduction of digitized data is repeated in the code (in this case, only for the earlier data from the Vostok station, which are also used in the first script).
+The first step in the script is loading the time series of the modelled ionospheric potential. Since there are quite a few of them (calculated for different models with various parameterisation settings), for consistency they are loaded into two dictionaries: `wrf_hourly_total_ip` and `inm_hourly_total_ip`. To facilitate temporal grouping, two auxiliary index arrays, `<model>_dt_indices`, of the same length as the arrays in the dictionaries, are also created; these arrays store the series of dates corresponding to the respective ionospheric potential arrays.

-### Preparing PG data
+| Dictionary | Key | IP Time Series |
+|---|---|---|
+| `wrf_hourly_total_ip` |  500 | WRF model, CAPE > 500 kJ/kg, new parameterisation with T2m > 25 °C |
+| `wrf_hourly_total_ip` |  800 | WRF model, CAPE > 800 J/kg |
+| `wrf_hourly_total_ip` | 1000 | WRF model, CAPE > 1000 J/kg |
+| `wrf_hourly_total_ip` | 1200 | WRF model, CAPE > 1200 J/kg |
+| `inm_hourly_total_ip` |  800 | INMCM model with CAPE > 800 J/kg |
+| `inm_hourly_total_ip` | 1000 | INMCM model with CAPE > 1000 J/kg |
+| `inm_hourly_total_ip` | 1200 | INMCM model with CAPE > 1200 J/kg |

-Secondly, measurement data from the Vostok station (pre-averaged by the hour) are loaded into Pandas dataframes, both new (dataframes `df_10s` and `df_5min`) and earlier (dataframe `earlier_df_src`) datasets.
+##### 4.1. Constructing Figure 2.1

-New measurements at the Vostok station are combined from hourly data derived from 10-second files and hourly data derived from 5-minute files; it should be noted that the dataset primarily relies on the 10-second data, and the 5-minute data are only used when the 10-second data were unavailable (there were 24 such hours in 2013, 312 in 2015, 1752 in 2017, and 3600 in 2020). The composite series of new measurements is saved in the dataframe `df_src`.
+Here we average the data from the loaded arrays in the dictionaries (data sets for both models with the usual parameterisation and CAPE thresholds of 800, 1000 and 1200 J/kg) over months and construct seven plots.

-Next, we introduce helper functions. Notably, the `pass_fair_weather` function, when applied to a dataframe, retains only those days when (1) there were no gaps, (2) the potential gradient did not exceed 300 V/m and was non-negative, and (3) the peak-to-peak amplitude was no more than 150% of the average daily value of the potential gradient.
+Each of these plots represents the seasonal variation for a specific model and set of parameters. Note that monthly grouping for seasonal averaging is implemented using the auxiliary index arrays described above.

-The next helper functions to mention are `calculate_seasonal_var_params` and `std_error`. 
+Six plots use the modelling data, while the seventh is based on the data from the Vostok station. It should be noted that here we use the output from the script `2_Vostok_measurements_images.ipynb`.

-They are structured such that the input to the first function is a dataframe with average daily values, and the function returns (1) an array of 12 average monthly values of PG, (2) an array of 12 counts of fair weather days per month, and (3) an array of 12 sums of squares of the average daily PG values of fair weather divided by the number of fair weather days, annotated by the following formula:
+##### 4.2. Constructing Figure 2.5

-sumₘ = Σ(daily mean PG for the `i`-th fair weather day)² / (count of fair weather days),
+Figure 2.5 is constructed similarly to Figure 2.1. However, here specific summer periods are selected for averaging prior to plotting. 

-where `m` denotes the month number `m = 1...12`, and `i` iterates over all fair weather days for which the month of the date equals `m`.
+### 5. The script `5_IP_simulations_spatial_images.ipynb`

-The `std_error` function is designed to take the output from the `calculate_seasonal_var_param`s function and return 12 values of the standard error, one for each month.
+This script performs a somewhat more complex calculation. It uses the same time-averaging approaches as the previous one but additionally decomposes the IP into contributions from different latitude ranges.

-Both described functions are used to compute values necessary for plotting graphs (mean value ± standard error).
+This script also introduces the results obtained using a new IP parameterisation, which includes the surface air temperature (`T2m`).

-For both new and early Vostok data, we apply the `pass_fair_weather` function, resulting in two datasets that contain only the hours of fair weather days (`df` and `earlier_df`)
+> **_Note:_** The description of some helper functions and helper arrays will not be provided here, as they are identical to those used in Scripts 1–4.

-### Figure 1.2
+#### 5.1. Helper class `LatitudeBand`

-To construct Figure 1.2, using the prepared data and helper functions, we calculate the mean values, the count of fair weather days and standard errors for three sets of data:
+The `LatitudeBand` class provides a convenient abstraction for extracting data corresponding to specific latitude ranges from two-dimensional arrays.

-1. The complete series of new Vostok data.
-2. The same series up to and including the year 2012.
-3. The same series after the year 2012.
+During initialisation, the model (either the WRF or the INMCM) is taken into account, recognising that these models are based on different grids. Also, the latitude range expressions such as `8N-8S` are parsed.

-> **_Note:_** The data from this figure is saved in the temporary file `vostok_2006_2020_results.npz` for use in the second article. This helps avoid code duplication or merging code to build different entities in a single cumbersome file.
+Useful attributes of this class include array slices, visualisation colours, as well as titles and labels for display purposes.

-### Figure 1.3
+Usage example:
+```python
+my_band = LatitudeBand("20N-20S", model="WRF")
+seas_var = some_LATxMON_wrf_ip[my_band.slice].sum(axis=0)
+```
+This code returns `seas_var`, a seasonal variation (12 values) of the contribution to the ionospheric potential from a specific latitude range between 20°N and 20°S.

-To construct Figure 1.3, we transform the Vostok data series into a matrix of 12 months x 24 hours. To do this, we group the original dataframe of fair weather hours by months and hours, and then find the mean value for all data points taken at a specific hour of a specific month (saved in dataframe `sd_df`).
+The class implements the calculation of various fields used for generating figures. For example, `my_band.title` will return the string `20° N–20° S`. For a more detailed understanding, it is recommended to review how class objects are utilised in the code, as the class definition itself may not be very intuitive.

-For clarity, we also present slices of this diurnal-seasonal diagram at 3, 9, 15, and 21 hours UTC.
+#### 5.2. Loading precalculated arrays and auxiliary calculations

-> **_Note:_**  Renaming the axes of the multi-index resulting from grouping (`sd_df.index.set_names(['hour', 'month'], inplace=True)`) is not necessary for the code and can be commented out; however, it may be convenient for further work with the diurnal-seasonal dataframe `sd_df`.
+The loading procedure is similar to that in Script 4. However, here we work not with the total one-dimensional ionospheric potential values but with contributions that are monthly averaged and summed within latitude bands of one grid step width.

-### Figure 1.5
-#### Removal of field anomalies associated with meteorological parameters
+The loaded arrays and dictionaries of arrays (see the description in Script 4) are named in the format `<model>_LATxMON_<variable>`, highlighting their dimensions (number of the latitudes in the model grid × 12).

-First, we load the meteorological datasets (`temp_df`, `wind_df`, `pressure_df`), averaged by days (`vostok_daily_temp`, `vostok_daily_wind`, `vostok_daily_pressure_mm_hg`). For further analysis, we use the `meteo_df` dataframe, which is created by merging the dataframe with daily average potential gradient values (`daily_df`).
+After loading these data, various seasonal variation characteristics are calculated for different latitude ranges. These include the contribution of each latitude range to the total ionospheric potential (as a percentage), the amplitude of the seasonal variation, the months when the minimum and maximum of the seasonal variation are attained.

-Next, we compile arrays of PG anomalies and anomalies for all meteorological parameters. The anomaly is calculated using a moving window of +-10 days.
+#### 5.3. Constructing Figures 2.2 and 2.4

-We then find the regression coefficients `temp_coeffs`, `wind_coeffs`, and `pres_coeffs` between the PG anomaly and the corresponding meteorological parameter anomalies, and calculate some statistical characteristics.
+For Figure 2.4 we decompose the seasonal variation of the ionospheric potential into contributions and show them cumulatively. The total value in the figure is composed of contributions from specific latitude ranges: `["0-9N", "0-9S", "9S-18S", "18S-90S", "9N-18N", "18N-90N"]`.

-Using the found regression coefficients, we remove the linear relationship with meteorological parameter anomalies. The corrected PG is saved in `meteo_df["CorrectedField"]`.
+Figure 2.2 uses a similar approach to averaging the arrays; however, the contributions of individual latitude strips are plotted in separate subplots. Additionally, seasonal variation patterns for the average temperature across different latitude ranges are shown.

-Finally, we construct Figure 1.5 using the prepared data in the same manner as was done for Figures 1.2 and 1.3.
+> **_Note:_** While the ionospheric potential contributions from individual grid cells within latitude ranges can be summed (they are additive), this is not the case for temperature. For temperature we must calculate a weighted average, where the weighting function takes into account the variability in the grid cell area.

+#### 5.4. Constructing Figure 2.6

-## Script `3_WRF_T2_images.ipynb`
+This figure, which illustrates the simulated diurnal and seasonal variations according to different parameterisations, introduces the data from the new parameterisation that takes into account surface air temperature.

-This script calculates the seasonal variation of the 2m-level temperature (T2m) taken from climate modeling results (see article 1).
+The data processing procedure for the seasonal variation is similar to that used in Figure 2.4. For the diurnal variation the arrays are created based on the hourly time series of the total ionospheric potential (`wrf_hourly_total_ip`), which are used exclusively for this figure.

-In the script, temperature data averaged by longitude and by month are loaded (see data description below) from `WRF_T2_MONxLAT.npy`.
+Note that we again utilise the data from the Vostok station to construct the figure. However, here we load two preprocessed data sets from previous scripts: one for the diurnal variation (`vostok_diurnal_2006_2020.npy`) and one for the seasonal variation (`vostok_2006_2020_results.npz`).

-Next, the temperature is averaged across latitude bands 20° S–20° N, 30° S–30° N, 40° S–40° N, and 50° S–50° N. The averaging takes into account the latitudinal area factor; degree cells at higher latitudes are summed with a diminishing coefficient. The results of the averaging (seasonal temperature variation in the specified latitude band) are displayed on a figure 1.4, 2.3 consisting of four panels.
+<!-- ### 6. Calculation of SWIP using the Weimer 2005 model -->
+<!-- ... -->

-## Script `4_IP_simulations_temporal_images.ipynb`
-...
+### Description of the data files

-## Script `5_IP_simulations_spatial_images.ipynb`
-...
+#### `./data/Vostok/`
+- `vostok_hourly_from_10_s_without_calibration_and_empty.tsv`, `vostok_hourly_from_5_min_without_calibration_and_empty.tsv`: Text files containing two columns, one of which represents the date and time (column `Datetime`) and the other, hourly averaged potential gradient values on the basis of the measurements at the Russian Antarctic station Vostok (column `Field`, the units are V/m).
+- `vostok_1998_2004_hourly_80percent_all.tsv`: Hourly potential gradient data for 1998–2004, based on earlier measurements with a different field mill. Contains two columns: `Datetime` (date and time) and `Field` (potential gradient values in V/m).
+- `vostok_1998_2020_hourly_SWIP.txt`: Simulated (with the Weimer 2005 model) hourly solar wind imposed ionospheric potentials over the Vostok station for 1998–2020.
+- `vostok_daily_pressure_mm_hg.csv`, `vostok_daily_temp.csv`, `vostok_daily_wind.csv`: CSV files containing daily averaged atmospheric pressure, temperature and wind speed at the Vostok station.

-# Description of the data files
+#### `./data/WRF/`
+- `WRF_HOURLY_TOTAL_IP_[500/800/1000/1200].npy`: `numpy` arrays containing hourly total ionospheric potential values simulated with the WRF model, assuming the CAPE threshold of 500, 800, 1000 or 1200 J/kg. Note that for the threshold of 500 J/kg a new parameterisation is applied, which also assumes a 25 °C threshold in temperature; the same applies to the next item on the list.
+- `WRF_IP_[500/800/1000/1200]_LATxMON.npy`: `numpy` arrays with dimensions `(latitude, 12)`, containing monthly average contributions to the ionospheric potential from different latitude ranges simulated with the WRF model, assuming the CAPE threshold of 500, 800, 1000 or 1200 J/kg.
+- `WRF_NUMDAYS_MON.npy`: A `numpy` array indicating the number of fair-weather days included in the monthly averages.
+- `WRF_T2_LATxMON.npy`: A `numpy` array with the shape `(180, 12)`, containing montly averaged temperatures at the height of 2 m (`T2m`) for each 1° wide latitude band. `T2m` are calculated with the WRF.

-* `WRF_T2_MONxLAT.npy` - a `numpy` array with the shape `(180, 12)`, containing montly averaged 2-meter level temperature (`T2m`) for each 1° latitude band across a full 360° longitude. `T2m` are calculated with the Weather Research and Forecasting model (WRF) version 4.3.
-* `vostok_hourly <...>.txt` - text files containing  two columns, one of which represents the date and time (column `Datetime`) and the other, hourly averaged potential gradient (PG) values on the basis of the latest measurements at the Russian Antarctic station Vostok (column `Field`, the units are V/m).
-* `vostok_1998_2004_hourly_80percent_all.tsv` - the same as the previous file, but these are early data collected by a different sensor during 1998-2004 
-* `vostok_daily <...>.txt` - text files containing three columns: one for the date (column `UTC`), the second for the daily averaged meteorological parameter based on measurements at the Russian Antarctic station Vostok, and the third column `Count` indicating the number of measurements per day (entries with fewer than 4 measurements must be filtered out before analysis).
+#### `./data/INMCM/`
+- `INMCM_HOURLY_TOTAL_IP_[800/1000/1200].npy`: `numpy` arrays containing hourly total ionospheric potential values simulated with the INMCM model, assuming the CAPE threshold of 800, 1000 or 1200 J/kg.
+- `INMCM_IP_[800/1000/1200]_LATxMON.npy`: `numpy` arrays with dimensions `(latitude, 12)`, containing monthly averaged contributions to the ionospheric potential from different latitude ranges simulated with the INMCM model, assuming the CAPE threshold of 800, 1000 or 1200 J/kg.
+- `INMCM_NUMDAYS_MON.npy`: A `numpy` array indicating the number of fair-weather days included in the monthly averages.