Data and code with comprehensive comments for articles on The Seasonal Variation of the DC Global Electric Circuit
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

28 KiB

Data preprocessing for further calculations

Importing libraries

In [1]:
import datetime as dt

import numpy as np
import pandas as pd

Helper variables

In [2]:
# also available at https://eee.ipfran.ru/files/seasonal-variation-2024
# attention: the files are very large (~ 300 GB in total)
src_path = "/home/shared_files/eee_public_files/seasonal-variation-2024"
In [3]:
# calculating the number of simulated days used for the WRF analysis
# (every third day is taken, starting with 1 Jan 1980;
# we limit the data to 1980–2020)

wrf_days = pd.date_range(start="1980-01-01", end="2021-01-01", freq="3D")
wrf_N_days = len(wrf_days[wrf_days.year <= 2020])  # limiting index for WRF days
print(wrf_N_days)

# calculating the number of simulated days used for the INMCM analysis
# (every day is taken, starting with 1 Jan 1979;
# we limit the data to 1980–2020)

inm_start_year = 1
inm_end_year = 42
inm_N_days = 365 * (inm_end_year - inm_start_year)
print(inm_N_days)
4992
14965
In [4]:
# dates corresponding to the indices of the data arrays
# (in the case of the WRF the dates exactly correspond to real dates,
# in the case of the INMCM there are 41 365-day years)

wrf_dt_indices = np.array(
    [dt.date(1980, 1, 1) + dt.timedelta(i * 3) for i in range(wrf_N_days)]
)
inm_dt_indices = np.array(
    [dt.date(2022, 1, 1) + dt.timedelta(i % 365) for i in range(inm_N_days)]
)

Preprocessing T2 data from the WRF

In [5]:
# `WRF-T2-DAILY.npy` contains WRF-simulated
# average air temperature values (in °C) at the height of 2 m with the shape
# (number of days, number of latitudes, number of longitudes);
# the file contains temperature values depending on (d, lat, lon)
#   d (axis 0) is the number of a day starting with 0 and ending with 5113
#              every third day is taken
#              d = 0 corresponds to 1 Jan 1980
#              d = 5386 corresponds to 28 Mar 2024
#              d = 4991 corresponds to 29 Dec 2020
#              (we will restrict our attention to 1980–2020)
# lat (axis 3) describes the latitude (an integer in [0, 179]
#              corresponding to 1° wide cells within 90°S–90°N)
# lon (axis 4) describes the longitude (an integer in [0, 359]
#              corresponding to 1° wide cells across each circle of latitudes)

wrf_T2_data = np.load(f"{src_path}/WRF-T2-DAILY.npy")[:wrf_N_days]
wrf_T2_data_DAYxLAT = wrf_T2_data.mean(axis=-1)
In [6]:
# initialising an array to store monthly averaged values
# for different latitudes
wrf_T2_LATxMON = np.zeros((180, 12))

# iterating over month numbers (starting with 0)
for month_idx in range(12):
    # filtering indices by the month number
    wrf_monthly_indices = [
        i for i, date in enumerate(wrf_dt_indices)
        if date.month == month_idx + 1
    ]

    # putting the values for the specific month into the array
    wrf_T2_LATxMON[:, month_idx] = (
        wrf_T2_data_DAYxLAT[wrf_monthly_indices].mean(axis=0)
    )

np.save("./data/WRF/WRF_T2_LATxMON.npy", wrf_T2_LATxMON)
In [7]:
del wrf_T2_data

Preprocessing the values of CAPE and convective precipitation from the INMCM and WRF

In [8]:
# `WRF-CAPE-00-12-TROP.npy` contains WRF-simulated
# average max-theta-e CAPE values (in J/kg) in the tropics with the shape
# (number of simulations per day, number of days, number of hours,
# number of latitudes, number of longitudes);
# the file contains CAPE values for the air parcel at the height of the maximum theta-e
# depending on (s, d, h, lat, lon)
#   s (axis 0) is the number of simulations
#              0 corresponds to the start of the model at 0 UTC the day before
#              1 corresponds to the start of the model at 12 UTC the day before
#   d (axis 1) is the number of a day starting with 0 and ending with 5113
#              every third day is taken
#              d = 0 corresponds to 1 Jan 1980
#              d = 5386 corresponds to 28 Mar 2024
#              d = 4991 corresponds to 29 Dec 2020
#              (we will restrict our attention to 1980–2020)
#   h (axis 2) is the hour since 18 hours after the model initiation
#              (an integer in [0, 24])
# lat (axis 3) describes the latitude (an integer in [0, 59]
#              corresponding to 1° wide cells within 30°S–30°N)
# lon (axis 4) describes the longitude (an integer in [0, 359]
#              corresponding to 1° wide cells across each circle of latitudes)

wrf_cape_data = np.load(
    f"{src_path}/WRF-CAPE-00-12-TROP.npy"
)[:, :wrf_N_days].flatten()
In [9]:
# `INMCM-SCAPE-TROP.npy` contains INMCM-simulated
# average surface CAPE values (in J/kg) in the tropics with the shape
# (number of years, number of days in a year, number of hours,
# number of latitudes, number of longitudes);
# the file contains CAPE values for the air parcel at the surface
# depending on (y, d, h, lat, lon)
#   y (axis 0) is the number of a year starting with 0 and ending with 42
#              y = 0 roughly corresponds to 1979
#              y = 42 roughly corresponds to 2021
#              y values in [1, 41] correspond to 1980–2020
#   d (axis 1) is the number of a day (an integer in [0, 364])
#              each model year consists of 365 days
#   h (axis 2) is the hour of the day (an integer in [0, 23])
# lat (axis 3) describes the latitude (an integer in [0, 39]
#              corresponding to 1.5° wide cells within 30°S–30°N)
# lon (axis 4) describes the longitude (an integer in [0, 179]
#              corresponding to 2° wide cells across each circle of latitude)

inm_scape_data = np.load(
    f"{src_path}/INMCM-SCAPE-TROP.npy"
)[inm_start_year:inm_end_year].flatten()
In [10]:
for a in [wrf_cape_data, inm_scape_data]:
    print(len(a), np.amin(a), np.amax(a))
5391360000 0.0 12336.779
2585952000 0.0 11888.347
In [11]:
# calculating histogram parameters

wrf_cape_hist = np.histogram(wrf_cape_data,
                             bins=np.arange(0, 17001, 10))
inm_scape_hist = np.histogram(inm_scape_data,
                              bins=np.arange(0, 17001, 10))
In [12]:
# saving the data for plotting histograms in other scripts

np.savez(
    "./data/WRF/WRF_CAPE_HIST.npz",
    values=wrf_cape_hist[0], bins=wrf_cape_hist[1]
)
np.savez(
    "./data/INMCM/INMCM_SCAPE_HIST.npz",
    values=inm_scape_hist[0], bins=inm_scape_hist[1]
)
In [13]:
# `WRF-RAIN-00-12-TROP.npy` contains WRF-simulated
# average amounts of convective precipitation in the tropics with the shape
# (number of simulations per day, number of days, number of hours,
# number of latitudes, number of longitudes);
# the file contains hourly values depending on (s, d, h, lat, lon)
#   s (axis 0) is the number of simulations
#              0 corresponds to the start of the model at 0 UTC the day before
#              1 corresponds to the start of the model at 12 UTC the day before
#   d (axis 1) is the number of a day starting with 0 and ending with 5113
#              every third day is taken
#              d = 0 corresponds to 1 Jan 1980
#              d = 5386 corresponds to 28 Mar 2024
#              d = 4991 corresponds to 29 Dec 2020
#              (we will restrict our attention to 1980–2020)
#   h (axis 2) is the hour since 18 hours after the model initiation
#              (an integer in [0, 24])
# lat (axis 3) describes the latitude (an integer in [0, 59]
#              corresponding to 1° wide cells within 30°S–30°N)
# lon (axis 4) describes the longitude (an integer in [0, 359]
#              corresponding to 1° wide cells across each circle of latitudes)

wrf_rain_data = np.load(
    f"{src_path}/WRF-RAIN-00-12-TROP.npy"
)[:, :wrf_N_days].flatten()
In [14]:
# `INMCM-RAIN-TROP.npy` contains INMCM-simulated
# average amounts of convective precipitation in the tropics with the shape
# (number of years, number of days in a year, number of hours,
# number of latitudes, number of longitudes);
# the file contains hourly values depending on (y, d, h, lat, lon)
#   y (axis 0) is the number of a year starting with 0 and ending with 42
#              y = 0 roughly corresponds to 1979
#              y = 42 roughly corresponds to 2021
#              y values in [1, 41] correspond to 1980–2020
#   d (axis 1) is the number of a day (an integer in [0, 364])
#              each model year consists of 365 days
#   h (axis 2) is the hour of the day (an integer in [0, 23])
# lat (axis 3) describes the latitude (an integer in [0, 39]
#              corresponding to 1.5° wide cells within 30°S–30°N)
# lon (axis 4) describes the longitude (an integer in [0, 179]
#              corresponding to 2° wide cells across each circle of latitude)

inm_rain_data = np.load(
    f"{src_path}/INMCM-RAIN-TROP.npy"
)[inm_start_year:inm_end_year].flatten()
In [15]:
# retaining only the cells actually contributing to the IP

wrf_cape_rain_data = wrf_cape_data[wrf_rain_data > 0]
inm_scape_rain_data = inm_scape_data[inm_rain_data > 0]
In [16]:
for a in [wrf_cape_rain_data, inm_scape_rain_data]:
    print(len(a), np.amin(a), np.amax(a))
899773291 0.0 7764.9863
919183611 0.0 11888.347
In [17]:
# calculating histogram parameters
# (only the cells contributing to the IP are taken into account)

wrf_cape_hist = np.histogram(wrf_cape_rain_data,
                             bins=np.arange(0, 17001, 10))
inm_scape_hist = np.histogram(inm_scape_rain_data,
                              bins=np.arange(0, 17001, 10))
In [18]:
# saving the data for plotting histograms in other scripts

np.savez(
    "./data/WRF/WRF_CAPE_RAIN_HIST.npz",
    values=wrf_cape_hist[0], bins=wrf_cape_hist[1]
)
np.savez(
    "./data/INMCM/INMCM_SCAPE_RAIN_HIST.npz",
    values=inm_scape_hist[0], bins=inm_scape_hist[1]
)
In [19]:
del wrf_cape_data, inm_scape_data
del wrf_rain_data, inm_rain_data

Preprocessing IP data from the INMCM and WRF: the usual parameterisation

In [20]:
# dictionaries where the processed data are saved
# dictionary keys represent CAPE threshold values

# dictionaries to store diurnal average IP values summed over longitudes
# the dimensions are (4992, 180) for the WRF and (365×41, 120) for the INMCM
wrf_daily_lat_ip = {}
inm_daily_lat_ip = {}

# dictionaries to store hourly IP values summed over longitudes and latitudes
# the dimensions are (4992, 24) for the WRF and (365×41, 24) for the INMCM
wrf_hourly_total_ip = {}
inm_hourly_total_ip = {}
In [21]:
# iterating over CAPE threshold values (in J/kg) used in modelling 
# for each threshold, there are corresponding model data sets

for cape_thres in [600, 800, 1000, 1200]:
    # `WRF-IP-CAPE-{cape_thres}.npy` contains WRF-simulated
    # grid cell contributions to the IP with CAPE threshold = `cape_thres` J/kg
    # (not normalised) with the shape
    # (number of days, number of hours,
    # number of latitudes, number of longitudes);
    # the file contains hourly values depending on (d, h, lat, lon)
    #   d (axis 0) is the number of a day starting with 0 and ending with 5113
    #              every third day is taken
    #              d = 0 corresponds to 1 Jan 1980
    #              d = 5386 corresponds to 28 Mar 2024
    #              d = 4991 corresponds to 29 Dec 2020
    #              (we will restrict our attention to 1980–2020)
    #   h (axis 1) is the hour of the day (an integer in [0, 24])
    #              the values corresponding to h = 0 and h = 24 are the same
    #              (we delete the 25th value)
    # lat (axis 2) describes the latitude (an integer in [0, 179]
    #              corresponding to 1° wide cells within 90°S–90°N)
    # lon (axis 3) describes the longitude (an integer in [0, 359]
    #              corresponding to 1° wide cells across each circle of latitudes)

    wrf_raw_ip_data = np.load(
        f"{src_path}/WRF-IP-CAPE-{cape_thres}.npy"
    )[:wrf_N_days, :24]

    # normalising contributions to the IP to the global mean of 240 kV
    wrf_raw_ip_data /= (1/240e3) * wrf_raw_ip_data.sum(axis=(-2,-1)).mean()

    # filling the dictionaries with averaged values
    wrf_daily_lat_ip[cape_thres] = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)
    wrf_hourly_total_ip[cape_thres] = wrf_raw_ip_data.sum(axis=(-2, -1))

    np.save(
        f"./data/WRF/WRF_HOURLY_TOTAL_IP_{cape_thres}.npy",
        wrf_hourly_total_ip[cape_thres]
    )
In [22]:
# iterating over CAPE threshold values (in J/kg) used in modelling 
# for each threshold, there are corresponding model data sets

for cape_thres in [600, 800, 1000, 1200]:
    # `INMCM-IP-CAPE-{cape_thres}.npy` contains INMCM-simulated
    # grid cell contributions to the IP with CAPE threshold = `cape_thres` J/kg
    # (not normalised) with the shape
    # (number of years, number of days in a year, number of hours,
    # number of latitudes, number of longitudes);
    # the file contains hourly values depending on (y, d, h, lat, lon)
    #   y (axis 0) is the number of a year starting with 0 and ending with 42
    #              y = 0 roughly corresponds to 1979
    #              y = 42 roughly corresponds to 2021
    #              y values in [1, 41] correspond to 1980–2020
    #   d (axis 1) is the number of a day (an integer in [0, 364])
    #              each model year consists of 365 days
    #   h (axis 2) is the hour of the day (an integer in [0, 23])
    # lat (axis 3) describes the latitude (an integer in [0, 39]
    #              corresponding to 1.5° wide cells within 30°S–30°N)
    # lon (axis 4) describes the longitude (an integer in [0, 179]
    #              corresponding to 2° wide cells across each circle of latitude)

    inm_raw_ip_data = np.load(
        f"{src_path}/INMCM-IP-CAPE-{cape_thres}.npy"
    )[inm_start_year:inm_end_year].reshape(
        (inm_N_days, 24, 120, 180)
    )

    # normalising contributions to the IP to the global mean of 240 kV
    inm_raw_ip_data /= (1/240e3) * inm_raw_ip_data.sum(axis=(-2,-1)).mean()

    # filling the dictionaries with averaged values
    inm_daily_lat_ip[cape_thres] = inm_raw_ip_data.mean(axis=1).sum(axis=-1)
    inm_hourly_total_ip[cape_thres] = inm_raw_ip_data.sum(axis=(-2, -1))

    np.save(
        f"./data/INMCM/INMCM_HOURLY_TOTAL_IP_{cape_thres}.npy",
        inm_hourly_total_ip[cape_thres]
    )
In [23]:
# iterating over CAPE threshold values (in J/kg) used in modelling 
# for each threshold, there are corresponding model data sets

for cape_thres in [600, 800, 1000, 1200]:
    # initialising arrays to store monthly averaged values
    # for different latitudes
    wrf_data_LATxMON = np.zeros((180, 12))
    inm_data_LATxMON = np.zeros((120, 12))

    # iterating over month numbers (starting with 0)
    for month_idx in range(12):
        # filtering indices by the month number
        wrf_monthly_indices = [i for i, date in enumerate(wrf_dt_indices)
                               if date.month == month_idx + 1]
        inm_monthly_indices = [i for i, date in enumerate(inm_dt_indices)
                               if date.month == month_idx + 1]

        # putting the values for the specific month into the array
        wrf_data_LATxMON[:, month_idx] = \
            wrf_daily_lat_ip[cape_thres][wrf_monthly_indices].mean(axis=0)
        inm_data_LATxMON[:, month_idx] = \
            inm_daily_lat_ip[cape_thres][inm_monthly_indices].mean(axis=0)

    np.save(
        f"./data/WRF/WRF_IP_{cape_thres}_LATxMON.npy",
        wrf_data_LATxMON
    )
    np.save(
        f"./data/INMCM/INMCM_IP_{cape_thres}_LATxMON.npy",
        inm_data_LATxMON
    )
In [24]:
del wrf_raw_ip_data, inm_raw_ip_data

Preprocessing IP data from the WRF: the new parameterisation

In [25]:
# `WRF-IP-CAPE-500-T2-LIN-25.npy` contains WRF-simulated
# grid cell contributions to the IP with CAPE threshold = 500 J/kg
# and temperature threshold = 25 °C (not normalised) with the shape
# (number of days, number of hours,
# number of latitudes, number of longitudes);
# the file contains hourly values depending on (d, h, lat, lon)
#   d (axis 0) is the number of a day starting with 0 and ending with 5113
#              every third day is taken
#              d = 0 corresponds to 1 Jan 1980
#              d = 5386 corresponds to 28 Mar 2024
#              d = 4991 corresponds to 29 Dec 2020
#              (we will restrict our attention to 1980–2020)
#   h (axis 1) is the hour of the day (an integer in [0, 24])
#              the values corresponding to h = 0 and h = 24 are the same
#              (we delete the 25th value)
# lat (axis 2) describes the latitude (an integer in [0, 179]
#              corresponding to 1° wide cells within 90°S–90°N)
# lon (axis 3) describes the longitude (an integer in [0, 359]
#              corresponding to 1° wide cells across each circle of latitudes)

wrf_raw_ip_data = np.load(
    f"{src_path}/WRF-IP-CAPE-500-T2-LIN-25.npy"
)[:wrf_N_days, :24]

# normalising contributions to the IP to the global mean of 240 kV
wrf_raw_ip_data /= (1/240e3) * wrf_raw_ip_data.sum(axis=(-2,-1)).mean()

# filling the dictionaries with averaged values
wrf_daily_latitudal_ip = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)
wrf_hourly_total_ip = wrf_raw_ip_data.sum(axis=(-2, -1))

np.save(
    "./data/WRF/WRF_HOURLY_TOTAL_IP_500_T2_25.npy",
    wrf_hourly_total_ip,
)
In [26]:
# initialising an array to store monthly averaged values
# for different latitudes
wrf_data_LATxMON = np.zeros((180, 12))

# iterating over month numbers (starting with 0)
for month_idx in range(12):
    # filtering indices by the month number
    wrf_monthly_indices = [i for i, date in enumerate(wrf_dt_indices)
                           if date.month == month_idx + 1]

    # putting the values for the specific month into the array
    wrf_data_LATxMON[:, month_idx] = \
        wrf_daily_latitudal_ip[wrf_monthly_indices].mean(axis=0)

np.save("./data/WRF/WRF_IP_500_T2_25_LATxMON.npy", wrf_data_LATxMON)
In [27]:
del wrf_raw_ip_data

Saving the number of days for each month (used to compute mean values)

In [28]:
# saving the number of days for each month
# necessary for correct averaging due to 
# different numbers of days in different months

wrf_days = np.array([len([i for i, date in enumerate(wrf_dt_indices) 
                          if date.month == m + 1])
                     for m in range(12)])
inm_days = np.array([len([i for i, date in enumerate(inm_dt_indices) 
                          if date.month == m + 1])
                     for m in range(12)])

np.save("./data/WRF/WRF_NUMDAYS_MON.npy", wrf_days)
np.save("./data/INMCM/INMCM_NUMDAYS_MON.npy", inm_days)

# to calculate the annual mean value, use
# `(wrf_data_LATxMON[:, :].sum(axis=0) * days).sum() / days.sum()`
# rather than
# `wrf_data_LATxMON[:, :].sum(axis=0).mean()`,
# since
# `((a1+a2+a3)/3 + (b1+b2)/2)/2 != (a1+a2+a3+b1+b2)/5`
In [ ]: