You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
15 KiB
15 KiB
Data preprocessing for further calculations¶
Importing libraries¶
In [1]:
import datetime as dt
import numpy as np
import pandas as pd
Helper variables¶
In [2]:
# also available at https://eee.ipfran.ru/files/seasonal-variation-2024/
# attention: the files are very large (~ 350 GB totally)
src_path = "../../shared_files/eee_public_files/seasonal-variation-2024/"
In [3]:
# the number of simulated days used for analysis
wrf_N_days = 4992
inm_N_days = 3650
In [4]:
# dates corresponding to the indices (0 axis) of the data arrays
# note: in the case of the WRF the dates correspond to real dates
# note: in the case of the INMCM the are 10 365-day years
wrf_dt_indices = np.array(
[dt.date(1980, 1, 1) + dt.timedelta(i * 3) for i in range(wrf_N_days)]
)
inm_dt_indices = np.array(
[dt.date(2022, 1, 1) + dt.timedelta(i % 365) for i in range(inm_N_days)]
)
Preprocessing T2m data from the WRF¶
In [5]:
# air temperature values (in K) at the height of 2 m with the shape
# (number of days, number of hours, number of latitudes, number of longitudes)
# contains temperature values depending on (d, h, lat, lon)
# d (axis 0) is the number of a day starting with 0 and ending with 5113
# every third day is taken
# d = 0 corresponds to 1 Jan 1980
# d = 5113 corresponds to 30 Dec 2021
# d = 4991 corresponds to 29 Dec 2020
# (we will restrict our attention to 1980–2020)
# h (axis 1) is the hour of the day (an integer in [0, 25])
# the values corresponding to h = 0 and h = 24 are the same
# (we delete the 25th value)
# lat (axis 2) describes the latitude (an integer in [0, 179])
# lon (axis 3) describes the longitude (an integer in [0, 359])
wrf_T2_data = np.load(f"{src_path}/WRF-T2-MAP.npy")[:wrf_N_days, :24]
wrf_T2_data_DAYxLAT = wrf_T2_data.mean(axis=(1, 3))
In [6]:
# initialising an array to store monthly averaged values
# for different latitudes
wrf_T2_LATxMON = np.zeros((180, 12))
# iterating over month numbers (starting with 0)
for month_idx in range(12):
# filtering indices by the month number
wrf_monthly_indices = [
i for i, date in enumerate(wrf_dt_indices)
if date.month == month_idx + 1
]
# putting the values for the specific month into the array
# (also converting the values from K to °C)
wrf_T2_LATxMON[:, month_idx] = (
wrf_T2_data_DAYxLAT[wrf_monthly_indices].mean(axis=0) - 273.15
)
np.save("./data/WRF/WRF_T2_LATxMON.npy", wrf_T2_LATxMON)
Preprocessing IP data from the INMCM and WRF: the usual parameterisation¶
In [7]:
# dictionaries where the processed data are saved
# dictionary keys represent CAPE threshold values
# dictionaries to store diurnal average IP values summed over longitudes
# the dimensions are (4992, 180) for the WRF and (3650, 120) for the INMCM
wrf_daily_lat_ip = {}
inm_daily_lat_ip = {}
# dictionaries to store hourly IP values summed over longitudes and latitudes
# the dimensions are (4992, 24) for the WRF and (3650, 24) for the INMCM
wrf_hourly_total_ip = {}
inm_hourly_total_ip = {}
In [8]:
# iterating over CAPE threshold values (in J/kg) used in modeling
# for each threshold, there are corresponding model data sets
for cape_thres in [800, 1000, 1200]:
# grid cell contributions to the IP (not normalised) with the shape
# (number of days, number of hours,
# number of latitudes, number of longitudes)
# simulated using the WRF with CAPE threshold = `cape_thres` J/kg
# contains values of contributions to the IP depending on (d, h, lat, lon)
# d (axis 0) is the number of a day starting with 0 and ending with 5113
# every third day is taken
# d = 0 corresponds to 1 Jan 1980
# d = 5113 corresponds to 30 Dec 2021
# d = 4991 corresponds to 29 Dec 2020
# (we will restrict our attention to 1980–2020)
# h (axis 1) is the hour of the day (an integer in [0, 24])
# the values corresponding to h = 0 and h = 24 are the same
# (we delete the 25th value)
# lat (axis 2) describes the latitude (an integer in [0, 179])
# lon (axis 3) describes the longitude (an integer in [0, 359])
wrf_raw_ip_data = np.load(
f"{src_path}/WRF-IP-MAP-{cape_thres}.npy"
)[:wrf_N_days, :24]
# normalising contributions to the IP to the global mean of 240 kV
wrf_raw_ip_data /= (1/240e3) * wrf_raw_ip_data.sum(axis=(-2,-1)).mean()
# filling the dictionaries with averaged values
wrf_daily_lat_ip[cape_thres] = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)
wrf_hourly_total_ip[cape_thres] = wrf_raw_ip_data.sum(axis=(-2, -1))
np.save(
f"./data/WRF/WRF_HOURLY_TOTAL_IP_{cape_thres}.npy",
wrf_hourly_total_ip[cape_thres]
)
# grid cell contributions to the IP (not normalised) reshaped to
# (number of days, number of hours,
# number of latitudes, number of longitudes)
# simulated using the INMCM with CAPE threshold = `cape_thres` J/kg
# contains values of contributions to the IP depending on (d, h, lat, lon)
# d (axis 0) is the number of a day
# (10 consecutive 365-day years have been simulated)
# h (axis 1) is the hour of the day (an integer in [0, 23])
# lat (axis 2) describes the latitude (an integer in [0, 179])
# lon (axis 3) describes the longitude (an integer in [0, 359])
inm_raw_ip_data = np.load(
f"{src_path}/INMCM-IP-MAP-{cape_thres}.npy"
).reshape((inm_N_days, 24, 120, 180))
# normalising contributions to the IP to the global mean of 240 kV
inm_raw_ip_data /= (1/240e3) * inm_raw_ip_data.sum(axis=(-2,-1)).mean()
# filling the dictionaries with averaged values
inm_daily_lat_ip[cape_thres] = inm_raw_ip_data.mean(axis=1).sum(axis=-1)
inm_hourly_total_ip[cape_thres] = inm_raw_ip_data.sum(axis=(-2, -1))
np.save(
f"./data/INMCM/INMCM_HOURLY_TOTAL_IP_{cape_thres}.npy",
inm_hourly_total_ip[cape_thres]
)
In [9]:
# iterating over CAPE threshold values (in J/kg) used in modeling
# for each threshold, there are corresponding model data sets
for cape_thres in [800, 1000, 1200]:
# initialising arrays to store monthly averaged values
# for different latitudes
wrf_data_LATxMON = np.zeros((180, 12))
inm_data_LATxMON = np.zeros((120, 12))
# iterating over month numbers (starting with 0)
for month_idx in range(12):
# filtering indices by the month number
wrf_monthly_indices = [i for i, date in enumerate(wrf_dt_indices)
if date.month == month_idx + 1]
inm_monthly_indices = [i for i, date in enumerate(inm_dt_indices)
if date.month == month_idx + 1]
# putting the values for the specific month into the array
wrf_data_LATxMON[:, month_idx] = \
wrf_daily_lat_ip[cape_thres][wrf_monthly_indices].mean(axis=0)
inm_data_LATxMON[:, month_idx] = \
inm_daily_lat_ip[cape_thres][inm_monthly_indices].mean(axis=0)
np.save(
f"./data/WRF/WRF_IP_{cape_thres}_LATxMON.npy",
wrf_data_LATxMON
)
np.save(
f"./data/INMCM/INMCM_IP_{cape_thres}_LATxMON.npy",
inm_data_LATxMON
)
Preprocessing IP data from the WRF: the new parameterisation¶
In [10]:
# grid cell contributions to the IP (not normalised) with the shape
# (number of days, number of hours, number of latitudes, number of longitudes)
# simulated using the WRF with CAPE threshold = 500 J/kg
# and temperature threshold = 25 °C
# contains values of contributions to the IP depending on (d, h, lat, lon)
# d (axis 0) is the number of a day starting with 0 and ending with 5113
# every third day is taken
# d = 0 corresponds to 1 Jan 1980
# d = 5113 corresponds to 30 Dec 2021
# d = 4991 corresponds to 29 Dec 2020
# (we will restrict our attention to 1980–2020)
# h (axis 1) is the hour of the day (an integer in [0, 24])
# the values corresponding to h = 0 and h = 24 are the same
# (we delete the 25th value)
# lat (axis 2) describes the latitude (an integer in [0, 179])
# lon (axis 3) describes the longitude (an integer in [0, 359])
wrf_raw_ip_data = np.load(
f"{src_path}/WRF-IP-MAP-500-T2-25.npy"
)[:wrf_N_days, :24]
# normalising contributions to the IP to the global mean of 240 kV
wrf_raw_ip_data /= (1/240e3) * wrf_raw_ip_data.sum(axis=(-2,-1)).mean()
# filling the dictionaries with averaged values
wrf_daily_latitudal_ip = wrf_raw_ip_data.mean(axis=1).sum(axis=-1)
wrf_hourly_total_ip = wrf_raw_ip_data.sum(axis=(-2, -1))
np.save(
"./data/WRF/WRF_HOURLY_TOTAL_IP_500_T2_25.npy",
wrf_hourly_total_ip,
)
In [11]:
# initialising an array to store monthly averaged values
# for different latitudes
wrf_data_LATxMON = np.zeros((180, 12))
# iterating over month numbers (starting with 0)
for month_idx in range(12):
# filtering indices by the month number
wrf_monthly_indices = [i for i, date in enumerate(wrf_dt_indices)
if date.month == month_idx + 1]
# putting the values for the specific month into the array
wrf_data_LATxMON[:, month_idx] = \
wrf_daily_latitudal_ip[wrf_monthly_indices].mean(axis=0)
np.save("./data/WRF/WRF_IP_500_T2_25_LATxMON.npy", wrf_data_LATxMON)
Saving the number of days for each month (used to compute mean values)¶
In [12]:
# saving the number of days for each month
# necessary for correct averaging due to
# different numbers of days in different months
wrf_days = np.array([len([i for i, date in enumerate(wrf_dt_indices)
if date.month == m + 1])
for m in range(12)])
inm_days = np.array([len([i for i, date in enumerate(inm_dt_indices)
if date.month == m + 1])
for m in range(12)])
np.save("./data/WRF/WRF_NUMDAYS_MON.npy", wrf_days)
np.save("./data/INMCM/INMCM_NUMDAYS_MON.npy", inm_days)
# to calculate the annual mean value, use
# `(wrf_data_LATxMON[:, :].sum(axis=0) * days).sum() / days.sum()`
# rather than
# `wrf_data_LATxMON[:, :].sum(axis=0).mean()`,
# since
# `((a1+a2+a3)/3 + (b1+b2)/2)/2 != (a1+a2+a3+b1+b2)/5`
In [ ]: