diff --git a/flows/resample_to_yearly_monthly.ipynb b/flows/resample_to_yearly_monthly.ipynb new file mode 100644 index 00000000..82aa2750 --- /dev/null +++ b/flows/resample_to_yearly_monthly.ipynb @@ -0,0 +1,203 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "82468291-f425-4fe4-a85e-5f8725d1030a", + "metadata": {}, + "outputs": [], + "source": [ + "import intake\n", + "import xarray as xr\n", + "import warnings\n", + "import fsspec\n", + "import pandas as pd\n", + "import os\n", + "from cmip6_downscaling.methods.common import utils\n", + "from dask.distributed import Client\n", + "\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "freq_dict = {'1AS': 'yearly', '1MS': 'monthly'}\n", + "freq_dict_lookup = {'yearly': '1AS', 'monthly': '1MS'}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b051726-fe98-4102-a1a6-7df9cdbbf06b", + "metadata": {}, + "outputs": [], + "source": [ + "client = Client()\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f8ae420-0c51-495b-a2ba-9b95f2bff24e", + "metadata": {}, + "outputs": [], + "source": [ + "def get_fsspec_fs():\n", + " if os.environ.get('AZURE_STORAGE_CONNECTION_STRING'):\n", + " del os.environ['AZURE_STORAGE_CONNECTION_STRING']\n", + " sas_token = ''\n", + " account_options = {'account_name': \"cpdataeuwest\", 'sas_token': sas_token}\n", + " return fsspec.filesystem('az', **account_options)\n", + "\n", + "\n", + "def store_exists(fs: fsspec.filesystem, store_path: str) -> bool:\n", + " return fs.exists(store_path.split('windows.net/')[1])\n", + "\n", + "\n", + "def return_intake_cat_df():\n", + " return intake.open_esm_datastore(\n", + " 'https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/catalogs/global-downscaled-cmip6.json'\n", + " ).df\n", + "\n", + "\n", + "def create_time_summary_options(df: pd.DataFrame) -> dict:\n", + " \"\"\"Create dictionary of frequency options and zarr store uris.\"\"\"\n", + " time_summary_dict = {}\n", + " for freq in freq_dict.keys():\n", + " time_summary_dict.update(\n", + " {\n", + " freq_dict[freq]: list(\n", + " df['downscaled_daily_data_uri'].str.replace('day', freq_dict[freq])\n", + " )\n", + " }\n", + " )\n", + " return time_summary_dict\n", + "\n", + "\n", + "def resample_dataset(resampled_path: str, freq: str, output_path: str = None):\n", + " \"\"\"resamples zarr store to frequency (monthly, yearly). Writes output to zarr store\"\"\"\n", + " daily_path = resampled_path.replace(freq, 'day')\n", + " ds = xr.open_zarr(daily_path)\n", + " out_ds = utils.resample_wrapper(ds, freq=freq_dict_lookup[freq])\n", + " if output_path:\n", + " out_ds.to_zarr(output_path, mode='w', consolidated=True)\n", + " else:\n", + " out_ds.to_zarr(resampled_path, mode='w', consolidated=True)\n", + "\n", + "\n", + "def resample_time_summaries(time_summary_dict: dict, fs: fsspec.filesystem):\n", + " \"\"\"Iterates through aval time frequencies (monthly, yearly), checks if store exists, runs resample_dataset function.\"\"\"\n", + " for freq in time_summary_dict.keys():\n", + " for time_summary in time_summary_dict[freq]:\n", + " if not store_exists(fs, time_summary): # if fsspec finds the store does not exist:\n", + " output_path = 's3://carbonplan-scratch' + time_summary.split('.net')[1]\n", + " resample_dataset(time_summary, freq, output_path=output_path)\n", + "\n", + "\n", + "def update_catalog(intake_cat_df: pd.DataFrame, fs: fsspec.filesystem):\n", + " \"\"\"Updates catalog with newly resampled stores\"\"\"\n", + " for freq in freq_dict.keys():\n", + " for index, row in intake_cat_df.iterrows():\n", + " mod_row = row.copy(deep=True)\n", + " downscaled_freq_data_uri = mod_row.downscaled_daily_data_uri.replace(\n", + " 'day', freq_dict[freq]\n", + " )\n", + " if store_exists(fs, downscaled_freq_data_uri): # if the store exists\n", + " mod_row.timescale = freq_dict[freq]\n", + " mod_row.downscaled_daily_data_uri = downscaled_freq_data_uri\n", + " intake_cat_df.append(mod_row, ignore_index=False)\n", + " intake_cat_df.loc[len(intake_cat_df)] = mod_row\n", + "\n", + " return intake_cat_df.drop_duplicates(\n", + " subset=intake_cat_df.columns.difference(['downscaled_daily_data_uri']), keep='first'\n", + " )\n", + "\n", + "\n", + "# check catalog, then save and replace once QA'd." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea5d5936-80e1-42f3-8c93-3db368ec7143", + "metadata": {}, + "outputs": [], + "source": [ + "fs = get_fsspec_fs()\n", + "intake_cat_df = return_intake_cat_df()\n", + "time_summary_dict = create_time_summary_options(intake_cat_df)\n", + "resample_time_summaries(time_summary_dict, fs)\n", + "updated_catalog = update_catalog(intake_cat_df, fs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "815e17de-bf6e-4d60-9aab-28d953906de8", + "metadata": {}, + "outputs": [], + "source": [ + "# updated_catalog.to_csv('s3://carbonplan-scratch/updated_catalog.csv',index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77349920-e0b5-4cc4-b00d-30c63cb3bb41", + "metadata": {}, + "outputs": [], + "source": [ + "###\n", + "ds_month = xr.open_zarr(\n", + " 's3://carbonplan-scratch/cp-cmip/version1/data/DeepSD-BC/CMIP.CCCma.CanESM5.historical.r1i1p1f1.monthly.DeepSD-BC.pr.zarr'\n", + ")\n", + "ds_year = xr.open_zarr(\n", + " 's3://carbonplan-scratch/cp-cmip/version1/data/DeepSD-BC/CMIP.CCCma.CanESM5.historical.r1i1p1f1.yearly.DeepSD-BC.pr.zarr'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdbc96b7-9dab-444e-bc72-a8b16ce742f0", + "metadata": {}, + "outputs": [], + "source": [ + "ds_month" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a55d91c2-63cb-4d17-a016-1c93d588e1d5", + "metadata": {}, + "outputs": [], + "source": [ + "ds_year" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.9 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}