{ "cells": [ { "cell_type": "markdown", "id": "113058f9-8324-4368-99ed-b088987c683a", "metadata": {}, "source": [ "# Analysis of the Vostok data\n", "The seasonal-diurnal diagram, its hourly cross-sections, possible role of adjustments for various factors\n", "\n", "The source code of Figures 1.2, 1.3, 1.5, 1.S1, 1.S2 and 1.S3" ] }, { "cell_type": "markdown", "id": "5870ae4e-8a7d-4d76-a7e0-a542ee0fbd25", "metadata": {}, "source": [ "### Importing libraries" ] }, { "cell_type": "code", "execution_count": 1, "id": "c4d7641a-fe60-4177-95d8-9f10e7622f87", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import matplotlib.transforms as tf\n", "from matplotlib import cm, colors, colormaps\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import scipy.stats as st" ] }, { "cell_type": "markdown", "id": "eb2d634e-ccc5-4fd2-a9e8-05d314f12c1b", "metadata": {}, "source": [ "### Loading source PG datasets" ] }, { "cell_type": "code", "execution_count": 2, "id": "80dbd8bc-6286-4b35-8de7-900551e1026e", "metadata": {}, "outputs": [], "source": [ "# potential gradient values measured at Vostok station in 1998–2001\n", "# adjusted for local meteorological and solar wind influences\n", "# units are V·m^(−1)\n", "# source: Burns et al. (2012), Table 2\n", "\n", "vostok_old_data = (\n", " np.array([195, 201, 205, 192, 188, 195,\n", " 209, 198, 209, 195, 193, 192])\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "27eaf008-0cf7-45f6-a5ee-2b32c4c6d322", "metadata": {}, "outputs": [], "source": [ "# potential gradient values measured at Vostok station in 1998–2001\n", "# units are V·m^(−1)\n", "# source: Burns et al. (2012), Table 2\n", "\n", "vostok_old_data_unadjusted = (\n", " np.array([195 - (23 - 0.1 - 1.4 + 10),\n", " 201 - (20 - 0.1 - 1.5 + 7),\n", " 205 - (8 - 1.5 - 0.1 + 6),\n", " 192 - (1.0 - 1.6 + 1.5 + 4.9),\n", " 188 - (-0.9 - 1.0 + 0.9 + 2.2),\n", " 195 - (2.4 + 1.2 - 1.6 + 2.7),\n", " 209 - (0.4 + 2.4 + 0.2 + 2.2),\n", " 198 - (0.1 - 1.2 + 1.5 + 1.9),\n", " 209 - (-0.1 - 0.1 + 1.0 + 4.9),\n", " 195 - (8 - 0.2 + 1.7 + 4.4),\n", " 193 - (20 + 0.1 - 0.7 + 7),\n", " 192 - (23 + 0.2 - 0.6 + 8)])\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "id": "c63b60de-82fa-4dcd-af67-a3d4738a0431", "metadata": {}, "outputs": [], "source": [ "# loading hourly data derived from 10-s averages\n", "# \"Field\" column contains PG values (in V/m) without scaling and calibration\n", "df_10s = pd.read_csv(\n", " \"./data/Vostok/vostok_hourly_from_10_s_without_calibration_and_empty.tsv\",\n", " sep=\"\\t\", parse_dates=[\"Datetime\"]\n", ").set_index(\"Datetime\")\n", "\n", "# adding a new column \"Mark\" to label this dataframe’s data\n", "# as originating from 10-s averages\n", "df_10s[\"Mark\"] = \"10s\"" ] }, { "cell_type": "code", "execution_count": 5, "id": "d2d83b75-97ce-42ba-8a99-8597f84d7946", "metadata": {}, "outputs": [], "source": [ "# loading hourly data derived from 5-min averages\n", "# \"Field\" column contains PG values (in V/m) without scaling and calibration\n", "df_5min = pd.read_csv(\n", " \"./data/Vostok/vostok_hourly_from_5_min_without_calibration_and_empty.tsv\",\n", " sep=\"\\t\", parse_dates=[\"Datetime\"]\n", ").set_index(\"Datetime\")\n", "\n", "# Add a new column \"Mark\" to label this dataframe’s data\n", "# as originating from 5-min averages\n", "df_5min[\"Mark\"] = \"5min\"" ] }, { "cell_type": "code", "execution_count": 6, "id": "26da1848-ad4f-4e58-8a75-d02428fb12c7", "metadata": {}, "outputs": [], "source": [ "# loading the earlier data set (1998-2004)\n", "earlier_df_src = pd.read_csv(\n", " \"./data/Vostok/vostok_1998_2004_hourly_80percent_all.tsv\",\n", " sep=\"\\t\", parse_dates=[\"Datetime\"]\n", ").set_index(\"Datetime\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "d126b8b1-0b66-4817-a556-8f33860608ae", "metadata": {}, "outputs": [], "source": [ "# note that all the aforementioned files assume a 80%\n", "# data availability per hour, which means that the hourly value\n", "# was calculated only when at least 80% of the records were \n", "# available (at least 288 10-s averages or at least 9 5-min averages)" ] }, { "cell_type": "code", "execution_count": 8, "id": "7de3455e-ba80-420d-8fec-8f82ee28804a", "metadata": {}, "outputs": [], "source": [ "# combining the two dataframes:\n", "# filling in gaps in the 10-s data with the 5-min data\n", "df_src = df_10s.combine_first(df_5min)" ] }, { "cell_type": "markdown", "id": "1f220bf9-7828-4a87-b84b-ab0b62cddcd1", "metadata": {}, "source": [ "### Taking form factors into account" ] }, { "cell_type": "code", "execution_count": 9, "id": "dbe063a2-5d6d-4e02-b10b-1438b3f75eeb", "metadata": {}, "outputs": [], "source": [ "# adding a new column \"Factor\" to introduce the form factor\n", "# by which the source values should be divided;\n", "# the factor gradually changes from `f_0` around 1 Jan 2006\n", "# to `f_1` around 1 Dec 2024\n", "f_0 = 3.\n", "f_1 = 2.4\n", "date_range = pd.date_range(start=\"2006-01-01\", end=\"2024-12-01\", freq=\"D\")\n", "values = np.linspace(f_0, f_1, len(date_range))\n", "\n", "factor_df = pd.DataFrame({\"Factor\": values}, index=date_range)\n", "df_src = df_src.join(factor_df.resample(\"H\").ffill())" ] }, { "cell_type": "code", "execution_count": 10, "id": "2c830a98-f5f2-4792-a8eb-4e6ee3170192", "metadata": {}, "outputs": [], "source": [ "# Optional: one can divide the earlier data by 1.35\n", "# to make equal the mean values of fair-weather histograms across data sets:\n", "# earlier_df[\"Factor\"] = 1.35" ] }, { "cell_type": "code", "execution_count": 11, "id": "0b0c6844-bcb4-4331-874f-4c1eeeb91a16", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Factor | \n", "
---|---|
2020-12-31 | \n", "2.524273 | \n", "