From 54768a5c968aca10b317061b3dc40c70b0804b61 Mon Sep 17 00:00:00 2001 From: James Halgren Date: Fri, 23 Sep 2022 15:36:15 -0500 Subject: [PATCH 1/2] move to folder --- .../parquet-demo-short-range-18files.ipynb | 0 .../parquet-demo-short-range-18files_feature_id.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc001.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc002.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc003.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc004.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc009.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc010.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc011.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc012.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc017.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc018.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc019.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc020.ipynb | 0 .../parquet-demo-short-range-18files_feature_id_tc025.ipynb | 0 .../parquet-demo-short-range-18files_multiple_feature_id.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc005.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc006.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc007.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc008.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc013.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc014.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc015.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc016.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc021.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc022.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc023.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc024.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc029.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc030.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc031.ipynb | 0 ...rquet-demo-short-range-18files_multiple_feature_id_tc032.ipynb | 0 .../parquet-demo-short-range-18files_plot_example.ipynb | 0 33 files changed, 0 insertions(+), 0 deletions(-) rename parquet-demo-short-range-18files.ipynb => parquet/parquet-demo-short-range-18files.ipynb (100%) rename parquet-demo-short-range-18files_feature_id.ipynb => parquet/parquet-demo-short-range-18files_feature_id.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc001.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc001.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc002.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc002.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc003.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc003.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc004.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc004.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc009.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc009.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc010.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc010.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc011.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc011.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc012.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc012.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc017.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc017.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc018.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc018.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc019.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc019.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc020.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc020.ipynb (100%) rename parquet-demo-short-range-18files_feature_id_tc025.ipynb => parquet/parquet-demo-short-range-18files_feature_id_tc025.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc005.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc005.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc006.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc006.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc007.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc007.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc008.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc008.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc013.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc013.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc014.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc014.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc015.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc015.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc016.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc016.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc021.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc021.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc022.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc022.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc023.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc023.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc024.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc024.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc029.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc029.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc030.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc030.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc031.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc031.ipynb (100%) rename parquet-demo-short-range-18files_multiple_feature_id_tc032.ipynb => parquet/parquet-demo-short-range-18files_multiple_feature_id_tc032.ipynb (100%) rename parquet-demo-short-range-18files_plot_example.ipynb => parquet/parquet-demo-short-range-18files_plot_example.ipynb (100%) diff --git a/parquet-demo-short-range-18files.ipynb b/parquet/parquet-demo-short-range-18files.ipynb similarity index 100% rename from parquet-demo-short-range-18files.ipynb rename to parquet/parquet-demo-short-range-18files.ipynb diff --git a/parquet-demo-short-range-18files_feature_id.ipynb b/parquet/parquet-demo-short-range-18files_feature_id.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc001.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc001.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc001.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc001.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc002.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc002.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc002.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc002.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc003.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc003.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc003.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc003.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc004.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc004.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc004.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc004.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc009.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc009.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc009.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc009.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc010.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc010.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc010.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc010.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc011.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc011.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc011.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc011.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc012.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc012.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc012.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc012.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc017.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc017.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc017.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc017.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc018.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc018.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc018.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc018.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc019.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc019.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc019.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc019.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc020.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc020.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc020.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc020.ipynb diff --git a/parquet-demo-short-range-18files_feature_id_tc025.ipynb b/parquet/parquet-demo-short-range-18files_feature_id_tc025.ipynb similarity index 100% rename from parquet-demo-short-range-18files_feature_id_tc025.ipynb rename to parquet/parquet-demo-short-range-18files_feature_id_tc025.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc005.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc005.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc005.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc005.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc006.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc006.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc006.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc006.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc007.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc007.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc007.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc007.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc008.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc008.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc008.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc008.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc013.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc013.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc013.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc013.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc014.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc014.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc014.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc014.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc015.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc015.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc015.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc015.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc016.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc016.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc016.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc016.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc021.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc021.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc021.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc021.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc022.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc022.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc022.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc022.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc023.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc023.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc023.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc023.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc024.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc024.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc024.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc024.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc029.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc029.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc029.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc029.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc030.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc030.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc030.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc030.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc031.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc031.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc031.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc031.ipynb diff --git a/parquet-demo-short-range-18files_multiple_feature_id_tc032.ipynb b/parquet/parquet-demo-short-range-18files_multiple_feature_id_tc032.ipynb similarity index 100% rename from parquet-demo-short-range-18files_multiple_feature_id_tc032.ipynb rename to parquet/parquet-demo-short-range-18files_multiple_feature_id_tc032.ipynb diff --git a/parquet-demo-short-range-18files_plot_example.ipynb b/parquet/parquet-demo-short-range-18files_plot_example.ipynb similarity index 100% rename from parquet-demo-short-range-18files_plot_example.ipynb rename to parquet/parquet-demo-short-range-18files_plot_example.ipynb From f3ed9a145a7e41b281558ef399f4ca1eb07dfe53 Mon Sep 17 00:00:00 2001 From: James Halgren Date: Fri, 23 Sep 2022 17:21:11 -0500 Subject: [PATCH 2/2] create example of remote data load --- data/.gitkeep | 0 parquet/example_remote_dl_parquet.ipynb | 328 ++++++++++++++++++++++++ 2 files changed, 328 insertions(+) create mode 100644 data/.gitkeep create mode 100644 parquet/example_remote_dl_parquet.ipynb diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/parquet/example_remote_dl_parquet.ipynb b/parquet/example_remote_dl_parquet.ipynb new file mode 100644 index 0000000..d6b8227 --- /dev/null +++ b/parquet/example_remote_dl_parquet.ipynb @@ -0,0 +1,328 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pyarrow.parquet import ParquetFile\n", + "import dask.dataframe as dd\n", + "import os\n", + "import xarray as xr\n", + "import ujson\n", + "import pprint\n", + "\n", + "#%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# These fs options don't work for http... beware!\n", + "so = dict(mode=\"rb\", anon=True, default_fill_cache=False, default_cache_type=\"first\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def gen_json(u, fs, outf=None):\n", + " with fs.open(u, **so) as infile:\n", + " h5chunks = SingleHdf5ToZarr(infile, u, inline_threshold=300)\n", + " p = u.split(\"/\")\n", + " date = p[3]\n", + " fname = p[5]\n", + " if outf:\n", + " # outf = f'{json_dir}{date}.{fname}.json'\n", + " with open(outf, \"wb\") as f:\n", + " f.write(ujson.dumps(h5chunks.translate()).encode())\n", + " else:\n", + " return h5chunks.translate()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# dir_files = [os.path.join(\"../short_range_18files\", files) for files in os.listdir(\"../short_range_18files\")]\n", + "# dir_files = [os.path.join(\"short_range_2files\", files) for files in os.listdir(\"short_range_2files\")]\n", + "# print(dir_files)\n", + "dir_files = [\n", + " \"nwm.t00z.short_range.channel_rt.f001.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f002.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f003.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f004.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f005.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f006.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f007.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f008.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f009.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f010.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f011.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f012.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f013.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f014.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f015.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f016.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f017.conus.nc\",\n", + " \"nwm.t00z.short_range.channel_rt.f018.conus.nc\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import fsspec\n", + "import xarray as xr\n", + "from kerchunk.hdf import SingleHdf5ToZarr\n", + "\n", + "fs = fsspec.filesystem(\"gcs\", anon=True)\n", + "\n", + "# https://storage.googleapis.com/national-water-model/nwm.20220911/short_range/nwm.t00z.short_range.channel_rt.f001.conus.nc\n", + "# gcs_url = \"gcs://national-water-model/nwm.20220911/short_range/nwm.t00z.short_range.channel_rt.f001.conus.nc\"\n", + "gcs_url = \"gcs://national-water-model/nwm.20220911/short_range/\"\n", + "\n", + "sr_h5 = []\n", + "for f in dir_files:\n", + " print(f)\n", + " sr_h5.append(gen_json(gcs_url + f, fs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%time\n", + "fds = []\n", + "for xj in sr_h5:\n", + " backend_args = {\n", + " \"consolidated\": False,\n", + " \"storage_options\": {\n", + " \"fo\": xj,\n", + " # Adding these options returns a properly dimensioned but otherwise null dataframe\n", + " # \"remote_protocol\": \"https\",\n", + " # \"remote_options\": {'anon':True}\n", + " },\n", + " }\n", + " fds.append(\n", + " xr.open_dataset(\n", + " \"reference://\",\n", + " engine=\"zarr\",\n", + " mask_and_scale=False,\n", + " backend_kwargs=backend_args,\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%time\n", + "ds = xr.concat(fds, dim=\"time\")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "df = ds[\"streamflow\"].to_dataframe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# df = df.streamflow\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%time\n", + "# df = pd.Series.to_frame(df)\n", + "\n", + "df.to_parquet(\n", + " \"../data/parquet_all_feature_ids.gzip\", engine=\"pyarrow\", compression=\"gzip\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "ParquetFile(\"../data/parquet_all_feature_ids.gzip\").metadata # num_columns: 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%time\n", + "data = dd.read_parquet(\n", + " \"../data/parquet_all_feature_ids.gzip\", storage_options={\"anon\": True}\n", + ")\n", + "data\n", + "result = data.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# result = result.loc[:, [101]]\n", + "result = result.loc[:, 100:1032]\n", + "# result= result.loc[:, :, 1000:11000]\n", + "# result= result.loc[:, :, 10000:110000]\n", + "result\n", + "r_xa = result.to_xarray()\n", + "r_xa\n", + "r_xa.plot.scatter(\"time\", \"streamflow\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}