From 0a3299282ecf062674511b379fee1f8a7bc9692d Mon Sep 17 00:00:00 2001 From: blizhan Date: Wed, 22 Nov 2023 15:11:24 +0800 Subject: [PATCH] feat: extend available index --- .gitignore | 3 +- README.md | 12 +- example/climate_index.ipynb | 434 ++++++++++++++++++++ maesters_of_clim/__init__.py | 22 +- maesters_of_clim/config/index_forecast.toml | 5 +- maesters_of_clim/config/index_history.toml | 25 ++ maesters_of_clim/fetcher/jamstec.py | 24 ++ maesters_of_clim/fetcher/psl.py | 2 +- setup.py | 2 +- 9 files changed, 521 insertions(+), 8 deletions(-) create mode 100644 maesters_of_clim/fetcher/jamstec.py diff --git a/.gitignore b/.gitignore index e6d6f9b..ebf80d5 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ __pycache__ *_checkpoints *egg-info *dist -*build \ No newline at end of file +*build +*.ipynb \ No newline at end of file diff --git a/README.md b/README.md index 350f8e0..2243b99 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ The following support |--|--|--|--|--| |IRI|IRI|forecast|ENSO Probability|`Climate_Maester(['enso'], 'iri').forecast(pred_at=date)`| |IRI|CPC|forecast|ENSO Probability|`Climate_Maester(['enso'], 'cpc').forecast(pred_at=date)`| +|JAMSTEC|JAMSTEC|forecast| Dipole Mode Index|`Climate_Maester(['dmi'], 'jamstec').forecast()`| |PSL/NCEI|PSL/NCEI|history|Nina 34 Anomaly|`Climate_Maester(['nina34a'], 'ncei').history()`| |PSL/NCEI|PSL/NCEI|history|Nina 3 Anomaly|`Climate_Maester(['nina3'], 'ncei').history()`| |PSL/NCEI|PSL/NCEI|history|Nina 4 Anomaly|`Climate_Maester(['nina4'], 'ncei').history()`| @@ -27,16 +28,25 @@ The following support |PSL|PSL|history|Bivariate ENSO from nina3.4 & soi|`Climate_Maester(['censo'], 'psl').history()`| |PSL|PSL|history|Western Pacific Index|`Climate_Maester(['wp'], 'psl').history()`| |PSL|PSL|history|AMO smoothed|`Climate_Maester(['amo_sm'], 'psl').history()`| +|PSL|PSL|history|Dipole Mode Index|`Climate_Maester(['dmi'], 'psl').history()`| +|PSL|PSL|history|Dipole Mode Index West|`Climate_Maester(['dmiwest'], 'psl').history()`| +|PSL|PSL|history|Dipole Mode Index East|`Climate_Maester(['dmieast'], 'psl').history()`| +|PSL|PSL|history|North Atlantic Oscillation|`Climate_Maester(['nao'], 'psl').history()`| +|PSL|PSL|history|North Pacific Index|`Climate_Maester(['np'], 'psl').history()`| +|PSL|PSL|history|Trans Polar Index|`Climate_Maester(['tpi'], 'psl').history()`| +|PSL|PSL|history|Global Average Temperature Anomaly from Station|`Climate_Maester(['glbts'], 'psl').history()`| +|PSL|PSL|history|Global Average Temperature Anomaly from Station and SST|`Climate_Maester(['glbtssst'], 'psl').history()`| |PSL/NCEI|PSL/NCEI|history|AMO unsmoothed|`Climate_Maester(['amo'], 'ncei').history()`| |PSL/NCEI|PSL/NCEI|history|Pacific Decadal Oscillation|`Climate_Maester(['pdo'], 'ncei').history()`| + ### Install ```shell pip install maesters-clim ``` -### Usage +### [Usage](example/climate_index.ipynb) ```python from maesters_of_clim import Climate_Maester from datetime import datetime diff --git a/example/climate_index.ipynb b/example/climate_index.ipynb index ccf83db..3617c5d 100644 --- a/example/climate_index.ipynb +++ b/example/climate_index.ipynb @@ -54,6 +54,47 @@ "df.set_index('month').plot(figsize=(12,6))" ] }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ncei': ['ninaa', 'nina', 'pdo', 'amo', 'iod'],\n", + " 'psl': ['nina34a',\n", + " 'nina4a',\n", + " 'nina3a',\n", + " 'nina1a',\n", + " 'soi',\n", + " 'oni',\n", + " 'tni',\n", + " 'censo',\n", + " 'ao',\n", + " 'pdo',\n", + " 'wp',\n", + " 'amo_us',\n", + " 'amo_sm',\n", + " 'dmi',\n", + " 'dmiwest',\n", + " 'dmieast',\n", + " 'nao',\n", + " 'np',\n", + " 'tpi',\n", + " 'glbts',\n", + " 'glbtssst']}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Climate_Maester.list_history_indexes()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -686,6 +727,392 @@ "detaildf" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MeanN2K1N3K2VN2K2hiVN1K1hiN1K2N3K2hiVN3K1VF2-MeanF2-3DVAR-Mean...sfe41dN3K2hiN2K2VN2K1Vsfe31dsfe33dN2K2N1K2VForecast_MonthRelease_Month
00.8648260.8527311.3916491.0278141.2319740.6580671.0293371.0862320.7054221.024229...NaN0.3921481.3606471.096233NaNNaN0.5868741.0423632023-122023-11
10.6931440.6431130.7994560.7758180.9141160.5685640.6560261.0615120.6062170.780071...NaN0.5780270.6372460.903040NaNNaN0.8793290.7871272024-012023-11
20.164003-0.0011320.402743-0.2331810.226465-0.0150450.5960270.5219860.1047570.223248...NaN0.4226580.9236630.324694NaNNaN0.035070-0.2054282024-022023-11
3-0.0547370.2014890.070049-0.3102180.164510-0.2038000.268819-0.476760-0.038085-0.071388...NaN-0.0424730.1281600.346698NaNNaN0.196848-0.3493592024-032023-11
40.0250790.1148160.295845-0.5667310.1010670.300536-0.140333-0.1758710.068239-0.018081...NaN0.200811-0.1061440.220650NaNNaN-0.292427-0.1439252024-042023-11
50.048468-0.321668-0.314554-0.0832530.3556310.520656-0.509384-0.2509820.0648690.032068...NaN0.7299940.162401-0.027692NaNNaN-0.0288200.4521472024-052023-11
6-0.029192-0.665404-0.0839090.1733520.4813250.596224-0.437844-0.582283-0.026888-0.031495...NaN0.7193460.516146-0.783964NaNNaN0.0746620.1365652024-062023-11
7-0.102502-0.609292-0.1908140.1205240.5256761.210806-0.694265-1.018683-0.011541-0.193464...NaN0.8296730.299437-0.732081NaNNaN0.1015680.3163952024-072023-11
8-0.330929-0.469554-0.297916-0.4955500.2554661.575441-1.222670-1.803841-0.058657-0.603202...NaN0.751265-0.334907-1.291626NaNNaN0.0099730.5824752024-082023-11
9-0.623000-1.0345160.005241-0.5929270.1459911.622889-1.574323-2.292726-0.263819-0.982181...NaN0.672980-0.820570-1.742812NaNNaN-0.5903840.2985932024-092023-11
10NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaN2024-102023-11
\n", + "

11 rows × 39 columns

\n", + "
" + ], + "text/plain": [ + " Mean N2K1 N3K2V N2K2hiV N1K1hi N1K2 N3K2hiV \\\n", + "0 0.864826 0.852731 1.391649 1.027814 1.231974 0.658067 1.029337 \n", + "1 0.693144 0.643113 0.799456 0.775818 0.914116 0.568564 0.656026 \n", + "2 0.164003 -0.001132 0.402743 -0.233181 0.226465 -0.015045 0.596027 \n", + "3 -0.054737 0.201489 0.070049 -0.310218 0.164510 -0.203800 0.268819 \n", + "4 0.025079 0.114816 0.295845 -0.566731 0.101067 0.300536 -0.140333 \n", + "5 0.048468 -0.321668 -0.314554 -0.083253 0.355631 0.520656 -0.509384 \n", + "6 -0.029192 -0.665404 -0.083909 0.173352 0.481325 0.596224 -0.437844 \n", + "7 -0.102502 -0.609292 -0.190814 0.120524 0.525676 1.210806 -0.694265 \n", + "8 -0.330929 -0.469554 -0.297916 -0.495550 0.255466 1.575441 -1.222670 \n", + "9 -0.623000 -1.034516 0.005241 -0.592927 0.145991 1.622889 -1.574323 \n", + "10 NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " N3K1V F2-Mean F2-3DVAR-Mean ... sfe41d N3K2hi N2K2V \\\n", + "0 1.086232 0.705422 1.024229 ... NaN 0.392148 1.360647 \n", + "1 1.061512 0.606217 0.780071 ... NaN 0.578027 0.637246 \n", + "2 0.521986 0.104757 0.223248 ... NaN 0.422658 0.923663 \n", + "3 -0.476760 -0.038085 -0.071388 ... NaN -0.042473 0.128160 \n", + "4 -0.175871 0.068239 -0.018081 ... NaN 0.200811 -0.106144 \n", + "5 -0.250982 0.064869 0.032068 ... NaN 0.729994 0.162401 \n", + "6 -0.582283 -0.026888 -0.031495 ... NaN 0.719346 0.516146 \n", + "7 -1.018683 -0.011541 -0.193464 ... NaN 0.829673 0.299437 \n", + "8 -1.803841 -0.058657 -0.603202 ... NaN 0.751265 -0.334907 \n", + "9 -2.292726 -0.263819 -0.982181 ... NaN 0.672980 -0.820570 \n", + "10 NaN NaN NaN ... NaN NaN NaN \n", + "\n", + " N2K1V sfe31d sfe33d N2K2 N1K2V Forecast_Month \\\n", + "0 1.096233 NaN NaN 0.586874 1.042363 2023-12 \n", + "1 0.903040 NaN NaN 0.879329 0.787127 2024-01 \n", + "2 0.324694 NaN NaN 0.035070 -0.205428 2024-02 \n", + "3 0.346698 NaN NaN 0.196848 -0.349359 2024-03 \n", + "4 0.220650 NaN NaN -0.292427 -0.143925 2024-04 \n", + "5 -0.027692 NaN NaN -0.028820 0.452147 2024-05 \n", + "6 -0.783964 NaN NaN 0.074662 0.136565 2024-06 \n", + "7 -0.732081 NaN NaN 0.101568 0.316395 2024-07 \n", + "8 -1.291626 NaN NaN 0.009973 0.582475 2024-08 \n", + "9 -1.742812 NaN NaN -0.590384 0.298593 2024-09 \n", + "10 NaN NaN NaN NaN NaN 2024-10 \n", + "\n", + " Release_Month \n", + "0 2023-11 \n", + "1 2023-11 \n", + "2 2023-11 \n", + "3 2023-11 \n", + "4 2023-11 \n", + "5 2023-11 \n", + "6 2023-11 \n", + "7 2023-11 \n", + "8 2023-11 \n", + "9 2023-11 \n", + "10 2023-11 \n", + "\n", + "[11 rows x 39 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = Climate_Maester('dmi', 'jamstec')\n", + "dmidf = c.forecast()\n", + "dmidf" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -849,6 +1276,13 @@ "df[~df['enso_event'].isna()]" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/maesters_of_clim/__init__.py b/maesters_of_clim/__init__.py index 1a3df9e..a51b0a5 100644 --- a/maesters_of_clim/__init__.py +++ b/maesters_of_clim/__init__.py @@ -1,6 +1,8 @@ from maesters_of_clim.fetcher.iri import get_iri_ensoprob_forecast from maesters_of_clim.fetcher.psl import get_psl_index_history from maesters_of_clim.fetcher.ncei import get_ncei_index_history +from maesters_of_clim.fetcher.jamstec import get_jamstec_dmi_forecast +from maesters_of_clim.config import load_config import pandas as pd @@ -37,14 +39,28 @@ def history(self)->pd.DataFrame: return df - def forecast(self, pred_at:datetime, source:str=None)->pd.DataFrame: + def forecast(self, pred_at:datetime=None, source:str=None)->pd.DataFrame: source = self.source if source is None else source if source in ['iri', 'cpc'] and 'enso' in self.indexes: df = get_iri_ensoprob_forecast(pred_month=pred_at, source=source) + elif source in ['jamstec'] and 'dmi' in self.indexes: + df = get_jamstec_dmi_forecast() elif self.source is not None: df = get_iri_ensoprob_forecast(pred_month=pred_at, source=source) else: df = get_iri_ensoprob_forecast(pred_month=pred_at) return df - - + + @staticmethod + def list_history_indexes(source:str=None): + if source is None: + source = ['ncei', 'psl'] + elif isinstance(source, str): + source = [source] + config = load_config('index_history') + result = {s: [] for s in source} + for s in source: + idxes = [i for i in config.get(s, {}).keys() if i != 'default'] + result[s].extend(idxes) + return result + \ No newline at end of file diff --git a/maesters_of_clim/config/index_forecast.toml b/maesters_of_clim/config/index_forecast.toml index ec639fe..dcccc63 100644 --- a/maesters_of_clim/config/index_forecast.toml +++ b/maesters_of_clim/config/index_forecast.toml @@ -1,3 +1,6 @@ [iri] [iri.nina34a] # nino3.4 anomaly -index_url='https://iri.columbia.edu/our-expertise/climate/forecasts/enso/%Y-{MONTH_ENG}-quick-look/?enso_tab=enso-{SOURCE}_{type}' \ No newline at end of file +index_url='https://iri.columbia.edu/our-expertise/climate/forecasts/enso/%Y-{MONTH_ENG}-quick-look/?enso_tab=enso-{SOURCE}_{type}' +[jamstec] +[jamstec.dmi] # dipole model index +index_url='https://www.jamstec.go.jp/virtualearth/data/SINTEX/SINTEX_DMI.csv' \ No newline at end of file diff --git a/maesters_of_clim/config/index_history.toml b/maesters_of_clim/config/index_history.toml index ac1dae0..8020033 100644 --- a/maesters_of_clim/config/index_history.toml +++ b/maesters_of_clim/config/index_history.toml @@ -35,6 +35,31 @@ index_url='https://psl.noaa.gov/data/correlation/amon.us.data' [psl.amo_sm] # AMO, smoothed index_url='https://psl.noaa.gov/data/correlation/amon.sm.data' missing=-99.99 +[psl.dmi] +index_url='https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/dmi.had.long.data' +missing=-9999 +[psl.dmiwest] +index_url='https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/dmiwest.had.long.data' +missing=-9999 +[psl.dmieast] +index_url='https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/dmieast.had.long.data' +missing=-9999 +[psl.nao] +index_url='https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/nao.long.data' +missing=-99.99 +[psl.np] +index_url='https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/np.long.data' +missing=-999 +[psl.tpi] # IPO +index_url='https://psl.noaa.gov/data/timeseries/IPOTPI/tpi.timeseries.ersstv5.data' +missing=-99 +[psl.glbts] +index_url='https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/GLBTS.long.data' +missing=9999 +[psl.glbtssst] +index_url='https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/GLBTSSST.long.data' +missing=9999 + [ncei] [ncei.default] missing=99.99 diff --git a/maesters_of_clim/fetcher/jamstec.py b/maesters_of_clim/fetcher/jamstec.py new file mode 100644 index 0000000..b26c94e --- /dev/null +++ b/maesters_of_clim/fetcher/jamstec.py @@ -0,0 +1,24 @@ +from maesters_of_clim.config import load_config + +from bs4 import BeautifulSoup +import pandas as pd +from retrying import retry + +from datetime import datetime, timedelta +import calendar +import requests + +iri_index_forecast = load_config('index_forecast').get('jamstec') + +dmi_url = iri_index_forecast.get('dmi').get('index_url') + + +@retry(stop_max_attempt_number=5) +def get_jamstec_dmi_forecast() -> pd.DataFrame: + df = pd.read_csv(dmi_url, parse_dates=['time']) + df = df.sort_values('time').set_index('time') + release_month = df[df.isna().all(axis=1).values].iloc[0].name + forecast_cols = ['Mean'] + list(set(df.columns) - set(['Obs', 'time', 'Mean'])) + df['Forecast_Month'] = df.index.map(lambda x: x.strftime('%Y-%m')) + df['Release_Month'] = release_month.strftime('%Y-%m') + return df[df.index>release_month].reset_index()[forecast_cols+['Forecast_Month', 'Release_Month']] diff --git a/maesters_of_clim/fetcher/psl.py b/maesters_of_clim/fetcher/psl.py index 16d2d07..71eb22d 100644 --- a/maesters_of_clim/fetcher/psl.py +++ b/maesters_of_clim/fetcher/psl.py @@ -40,7 +40,7 @@ def get_psl_index_history(index_name:str)->pd.DataFrame: index_url = index_dict.get('index_url') columns = index_dict.get('columns', psl_index_history.get('default').get('columns')) missing = index_dict.get('missing', psl_index_history.get('default').get('missing')) - df = pd.read_csv(index_url, delimiter='\s+', names=columns) + df = pd.read_csv(index_url, delimiter='\s+', names=columns,on_bad_lines='skip') if index_name in []: pass diff --git a/setup.py b/setup.py index 98c770c..457f03d 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name="maesters-clim", - version="0.0.3b", + version="0.0.4", author="blizhan", author_email="blizhan@icloud.com", description="Maesters-of-Clim tempt to help retriving climate data (climate index, reanalysis) from the main-stream climate insitution (like IRI, PSL, NCEI, RDA).",