-
Notifications
You must be signed in to change notification settings - Fork 12
/
make_hres_input.py
115 lines (92 loc) · 3.56 KB
/
make_hres_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import numpy as np
import pandas as pd
import xarray as xr
"""
FuXi模型的输入变量:
5个气压层变量: ['Z', 'T', 'U', 'V', 'R'],
每个变量包含13层: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000],
5个地面变量: ['T2M', 'U10', 'V10', 'MSL', 'TP'];
注意事项:
1. 输入是连续的两个历史时刻, 间隔6小时, 分辨率是0.25; eg: [00, 06]做为输入那么起报时刻是06点;
2. Z不是Geopential Height, 是 Geopential;
3. 降水是6小时累积单位mm, 第一个时刻的降水可以置0;
4. 温度是开尔文单位;
5. R表示相对湿度;
6. 纬度方向是90 ~ -90;
7. 气压层的顺序是从高空到地面50 ~ 1000; eg: Z50, Z100, ... , Z1000;
8. 数据中不能有NAN;
"""
def make_hres_input(init_time, data_dir, save_dir, degree=0.25):
lat = np.linspace(-90, 90, int(180 / degree) + 1, dtype=np.float32)
lon = np.arange(0, 360, degree, dtype=np.float32)
pl_names = ["z", "t", "u", "v", "r"]
sfc_names = ["t2m", "u10", "v10", "msl", "tp"]
levels = [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
input = []
level = []
for name in pl_names + sfc_names:
src_name = "{}_{}".format(name, init_time.strftime("%Y%m%d%H.nc"))
src_file = os.path.join(data_dir, src_name)
if not os.path.exists(src_file):
return
try:
v = xr.open_dataset(src_file)
v = v.sel(time=init_time, drop=True).data
except:
print(f"open {src_file} failed")
return
# is there nan in raw data ?
if np.isnan(v).sum() > 0:
print(f"{src_name} has nan value")
return
# interpolate to 0.25 deg
v = v.interp(lat=lat, lon=lon, kwargs={"fill_value": "extrapolate"})
# make sure on nan
if np.isnan(v).sum() > 0:
print(f"{src_name} has nan value")
return
# reverse pressure level
try:
if name in pl_names:
v = xr.concat([v.sel(level=l) for l in levels], "level")
level.extend([f"{name}{l}" for l in levels])
except:
print("missing pressure level")
return
if name in sfc_names:
level.append(name)
# temperature in kelvin
if name == "t":
v = v + 273.15
# FuXi take two step as input
if name == "tp":
v = v.clip(min=0, max=1000)
zero = v * 0
zero = zero.assign_coords(dtime=[0])
v = xr.concat([zero, v], "dtime")
print(f"{src_name}: {v.min().values:.2f} ~ {v.max().values:.2f}")
v.attrs = {}
v = v.rename({"dtime": "time"})
v = v.squeeze("member").drop("member")
input.append(v)
# concat and reshape
input = xr.concat(input, "level")
input = input.transpose("time", "level", "lat", "lon")
valid_time = init_time + pd.Timedelta(hours=6) # utc time
v = v.assign_coords(time=[init_time, valid_time])
# reverse latitude
input = input.reindex(lat=input.lat[::-1])
input = input.assign_coords(level=level)
input.name = "data"
# save to nc
print(input)
save_name = os.path.join(save_dir, init_time.strftime("%Y%m%d-%H.nc"))
input = input.astype(np.float32)
input.to_netcdf(save_name)
def test_make_input():
init_time = pd.to_datetime("20230731-12") # must utc
data_dir = "data/HRES"
save_dir = "data/HRES/input"
os.makedirs(save_dir, exist_ok=True)
make_hres_input(init_time, data_dir, save_dir)