Data and code with comprehensive comments for articles on The Seasonal Variation of the DC Global Electric Circuit
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

17 KiB

Data preprocessing for further calculations

Import libraries

In [2]:
import datetime as dt

import numpy as np
import pandas as pd

Helper variables

In [3]:
# also available at https://eee.ipfran.ru/files/seasonal-variation-2024/
# attention: the files are very large (~ 350 GB totally)
src_path = "../shared_files/eee_public_files/seasonal-variation-2024/"
In [4]:
# used numbers of simulated days for analysis
wrf_N_days = 4992
inmcm_N_days = 3650
In [5]:
# dates corresponding to the indices (0 axis) of the data arrays
# note: for WRF dates correspond to real dates

wrf_dt_indicies = np.array(
    [dt.date(1980, 1, 1) + dt.timedelta(i * 3) for i in range(wrf_N_days)]
)
inmcm_dt_indicies = np.array(
    [dt.date(2022, 1, 1) + dt.timedelta(i % 365) for i in range(inmcm_N_days)]
)

Preprocessing WRF T2m data

In [10]:
# air temperature at the height of 2 m with the shape
# (number of days, number of hours, number of latitudes, number of longitudes)
# contains temperature values depending on (d, h, lat, lon)
#   d (axis 0) is the number of a day starting with 0 and ending with 5113
#              every third day is taken
#              d = 0 corresponds to 1 Jan 1980, 
#              d = 5113 corresponds to 30 Dec 2021
#              d = 4991 corresponds to 29 Dec 2020
#              (we will restrict our attention to 1980–2020)
#   h (axis 1) is the hour of the day (an integer in [0, 24])
#              the values corresponding to h = 0 and h = 24 are the same
# lat (axis 2) describes the latitude (an integer in [0, 179]) 
# lon (axis 3) describes the longitude (an integer in [0, 359])

wrf_T2_data = np.load(f"{src_path}/WRF-T2-MAP.npy")[:wrf_N_days]
wrf_T2_data_DAYxLAT = wrf_T2_data.mean(axis=(1, 3))
In [11]:
wrf_T2_data_DAYxLAT.shape
Out[11]:
(4992, 180)
In [14]:
# air temperature averaged over latitudes and months
wrf_mon_T2 = np.zeros((180, 12))

for month_idx in range(12):
    # filter indicies by month number
    monthly_indicies = [
        i for i, date in enumerate(wrf_dt_indicies) if date.month == month_idx + 1
    ]

    # putting values at specific month into averaged array
    wrf_mon_T2[:, month_idx] = wrf_T2_data_DAYxLAT[monthly_indicies].mean(axis=0)-273.15

np.save(f"./data/WRF/WRF_T2_LATxMON.npy",wrf_mon_T2)
In [17]:
wrf_mon_T2.max()
Out[17]:
27.894258059212177

Preprocessing INMCM and WRF IP: classic parametrization

In [ ]:
# dictionaries where processed data is saved
# the dictionary keys represent the threshold value of CAPE

# for storing arrays with averaged by hours and summarized by longitude,
# i.e. with dimensions (4992, 180) for WRF and (3650, 120) for INMCM
wrf_daily_latitudal_ip = {}
inmcm_daily_latitudal_ip = {}

# for storing arrays summarized by longitude and latitude,
# i.e. with dimensions (4992, 24) for WRF and (3650, 24) for INMCM
wrf_hourly_total_ip = {}
inmcm_hourly_total_ip = {}
In [ ]:
# iterating over the CAPE threshold (J/kg) values used in modeling 
# for each threshold, there are corresponding model datasets
for cape_thres in [800, 1000, 1200]:

    # grid cell contributions to the IP (not normalised) with the shape
    # (number of days, number of hours, number of latitudes, number of longitudes)
    wrf_raw_ip_data = np.load(f"{src_path}/WRF-IP-MAP-{cape_thres}.npy")[:wrf_N_days]
    # modelled using WRF model with CAPE threshold = `cape_thres` J/kg
    # contains values of contributions to the IP depending on (d, h, lat, lon)
    #   d (axis 0) is the number of a day starting with 0 and ending with 5113
    #              every third day is taken
    #              d = 0 corresponds to 1 Jan 1980, 
    #              d = 5113 corresponds to 30 Dec 2021
    #              d = 4991 corresponds to 29 Dec 2020
    #              (we will restrict our attention to 1980–2020)
    #   h (axis 1) is the hour of the day (an integer in [0, 24])
    #              the values corresponding to h = 0 and h = 24 are the same
    # lat (axis 2) describes the latitude (an integer in [0, 179]) 
    # lon (axis 3) describes the longitude (an integer in [0, 359])

    # discarding the last hour, which duplicates the first one
    wrf_raw_ip_data = wrf_raw_ip_data[:, :24, :, :]
    
    # normalisation of contributions to the IP to the global mean of 240 kV
    wrf_raw_ip_data /= (1/240e3) * wrf_raw_ip_data.sum(axis=(-2,-1)).mean()

    # filling dictionaries with averaged arrays
    wrf_daily_latitudal_ip[cape_thres] = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)
    wrf_hourly_total_ip[cape_thres] = wrf_raw_ip_data.sum(axis=(-2, -1))

    np.save(f"./data/WRF/WRF_HOURLY_TOTAL_IP_{cape_thres}.npy",
            wrf_hourly_total_ip[cape_thres])

    # grid cell contributions to the IP (not normalised) reshaped to
    # (number of days, number of hours, number of latitudes, number of longitudes)
    inmcm_raw_ip_data = np.load(f"{src_path}/INMCM-IP-MAP-{cape_thres}.npy")\
                          .reshape((inmcm_N_days, 24, 120, 180))
    # modelled using INMCM model with CAPE threshold = `cape_thres` J/kg
    # contains values of contributions to the IP depending on (d, h, lat, lon)
    #   d (axis 0) is the number of a day (not correspond to real days,
    #              10 consecutive 365-day years have been simulated)
    #   h (axis 1) is the hour of the day (an integer in [0, 23])
    # lat (axis 2) describes the latitude (an integer in [0, 179]) 
    # lon (axis 3) describes the longitude (an integer in [0, 359])

    # normalisation of contributions to the IP to the global mean of 240 kV
    inmcm_raw_ip_data /= (1/240e3) * inmcm_raw_ip_data.sum(axis=(-2,-1)).mean()

    # filling dictionaries with averaged arrays
    inmcm_daily_latitudal_ip[cape_thres] = inmcm_raw_ip_data.mean(axis=1).sum(axis=-1)
    inmcm_hourly_total_ip[cape_thres] = inmcm_raw_ip_data.sum(axis=(-2, -1))

    np.save(f"./data/INMCM/INMCM_HOURLY_TOTAL_IP_{cape_thres}.npy",
            inmcm_hourly_total_ip[cape_thres])
In [ ]:
# iterating over the CAPE threshold (J/kg) values used in modeling 
# for each threshold, there are corresponding model datasets
for cape_thres in [800, 1000, 1200]:

    # initialization of an arrays to store time-averaged data over months
    wrf_data_LATxMON = np.zeros((180, 12))
    inmcm_data_LATxMON = np.zeros((120, 12))

    # iteration over month number (starting with 0)
    for month_idx in range(12):

        # filtering day indices belonging to a specific month
        wrf_monthly_indicies = [i for i, date in enumerate(wrf_dt_indicies) 
                                if date.month == month_idx + 1]
        inm_monthly_indicies = [i for i, date in enumerate(inmcm_dt_indicies) 
                                if date.month == month_idx + 1]

        # filling with modeling values with a CAPE threshold 
        # averaged over months of the year
        wrf_data_MONxLAT[:, month_idx] = \
            wrf_daily_latitudal_ip[cape_thres][monthly_indicies].mean(axis=0)
        inmcm_data_LATxMON[:, month_idx] = \
            inmcm_daily_latitudal_ip[cape_thres][monthly_indicies].mean(axis=0)

    np.save(f"./data/WRF/WRF_IP_{cape_thres}_LATxMON.npy",
            wrf_data_MONxLAT)
    np.save(f"./data/INMCM/INMCM_IP_{cape_thres}_LATxMON.npy",
            inmcm_data_LATxMON)

Preprocessing WRF IP: new parametrization

In [ ]:
# grid cell contributions to the IP (not normalised) reshaped to
# (number of days, number of hours, number of latitudes, number of longitudes)
wrf_raw_ip_data = np.load(f"{src_path}/WRF-IP-MAP-500-T2-25.npy")[:wrf_N_days]
# modelled using WRF model using new parametrization based on
# CAPE and T2 with corresponding thresholds 500 J/kg and 25°C.
# contains values of contributions to the IP depending on (d, h, lat, lon)
#   d (axis 0) is the number of a day starting with 0 and ending with 5113
#              every third day is taken
#              d = 0 corresponds to 1 Jan 1980, 
#              d = 5113 corresponds to 30 Dec 2021
#              d = 4991 corresponds to 29 Dec 2020
#              (we will restrict our attention to 1980–2020)
#   h (axis 1) is the hour of the day (an integer in [0, 24])
#              the values corresponding to h = 0 and h = 24 are the same
# lat (axis 2) describes the latitude (an integer in [0, 179]) 
# lon (axis 3) describes the longitude (an integer in [0, 359])

# discarding the last hour, which duplicates the first one
wrf_raw_ip_data = wrf_raw_ip_data[:, :24, :, :]

# normalisation of contributions to the IP to the global mean of 240 kV
wrf_raw_ip_data /= (1/240e3) * wrf_raw_ip_data.sum(axis=(-2,-1)).mean()

# filling dictionaries with averaged arrays
wrf_daily_latitudal_ip = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)
wrf_hourly_total_ip = wrf_raw_ip_data.sum(axis=(-2, -1))

np.save(
    f"./data/WRF/WRF_HOURLY_TOTAL_IP_500_T2_25.npy",
    wrf_hourly_total_ip,
)
In [ ]:
# iterating over the CAPE threshold (J/kg) values used in modeling 
# for each threshold, there are corresponding model datasets
for cape_thres in [800, 1000, 1200]:

    # initialization of an arrays to store time-averaged data over months
    wrf_data_LATxMON = np.zeros((180, 12))
    inmcm_data_LATxMON = np.zeros((120, 12))

    # iteration over month number (starting with 0)
    for month_idx in range(12):

        # filtering day indices belonging to a specific month
        wrf_monthly_indicies = [i for i, date in enumerate(wrf_dt_indicies) 
                                if date.month == month_idx + 1]
        inm_monthly_indicies = [i for i, date in enumerate(inmcm_dt_indicies) 
                                if date.month == month_idx + 1]

        # filling with modeling values with a CAPE threshold 
        # averaged over months of the year
        wrf_data_MONxLAT[:, month_idx] = \
            wrf_daily_latitudal_ip[cape_thres][monthly_indicies].mean(axis=0)
        inmcm_data_LATxMON[:, month_idx] = \
            inmcm_daily_latitudal_ip[cape_thres][monthly_indicies].mean(axis=0)
In [ ]:
# initialization of a array to store time-averaged data over months
wrf_data_LATxMON = np.zeros((180, 12))

# iteration over month number (starting with 0)
for month_idx in range(12):
    # filtering day indices belonging to a specific month
    monthly_indicies = [i for i, date in enumerate(wrf_dt_indicies)
                        if date.month == month_idx + 1]

    # filling with modeling values averaged over months of the year
    wrf_data_LATxMON[:, month_idx] = \
        wrf_daily_latitudal_ip[monthly_indicies].mean(axis=0)

np.save(f"./data/WRF/WRF_IP_500_T2_25_LATxMON.npy", wrf_data_LATxMON)

Saving number of days (used for monthly mean) for each month

In [ ]:
# saving the number of days for each month
# necessary for correct averaging due to 
# different numbers of days in different months

wrf_days = np.array([len([i for i, date in enumerate(wrf_dt_indicies) 
                          if date.month == m + 1]) 
                     for m in range(12)])

inm_days = np.array([len([i for i, date in enumerate(inmcm_dt_indicies) 
                          if date.month == m + 1]) 
                     for m in range(12)])

np.save(f"./data/WRF/WRF_NUMDAYS_MON.npy", wrf_days)
np.save(f"./data/INMCM/INMCM_NUMDAYS_MON.npy", inm_days)

# for average over months use
# `(wrf_data_LATxMON[:, :].sum(axis=0)*days).sum()/days.sum()`
# unstead
# `wrf_data_LATxMON[:, :].sum(axis=0).mean()`
# because
# `((a1+a2+a3)/3 + (b1+b2)/2)/2 != (a1+a2+a3+b1+b2)/5`