Skip to content

Commit

Permalink
notebook to generate monthly/yearly aggregations for msft bucket (#289)
Browse files Browse the repository at this point in the history
* notebook to generate monthly/yearly aggregations for msft bucket

* Update flows/resample_to_yearly_monthly.ipynb

Co-authored-by: Anderson Banihirwe <axbanihirwe@ualr.edu>

* Update flows/resample_to_yearly_monthly.ipynb

Co-authored-by: Anderson Banihirwe <axbanihirwe@ualr.edu>

Co-authored-by: Anderson Banihirwe <axbanihirwe@ualr.edu>
  • Loading branch information
norlandrhagen and andersy005 authored Oct 6, 2022
1 parent e666b5b commit bf06467
Showing 1 changed file with 203 additions and 0 deletions.
203 changes: 203 additions & 0 deletions flows/resample_to_yearly_monthly.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "82468291-f425-4fe4-a85e-5f8725d1030a",
"metadata": {},
"outputs": [],
"source": [
"import intake\n",
"import xarray as xr\n",
"import warnings\n",
"import fsspec\n",
"import pandas as pd\n",
"import os\n",
"from cmip6_downscaling.methods.common import utils\n",
"from dask.distributed import Client\n",
"\n",
"warnings.simplefilter(action='ignore', category=FutureWarning)\n",
"freq_dict = {'1AS': 'yearly', '1MS': 'monthly'}\n",
"freq_dict_lookup = {'yearly': '1AS', 'monthly': '1MS'}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b051726-fe98-4102-a1a6-7df9cdbbf06b",
"metadata": {},
"outputs": [],
"source": [
"client = Client()\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f8ae420-0c51-495b-a2ba-9b95f2bff24e",
"metadata": {},
"outputs": [],
"source": [
"def get_fsspec_fs():\n",
" if os.environ.get('AZURE_STORAGE_CONNECTION_STRING'):\n",
" del os.environ['AZURE_STORAGE_CONNECTION_STRING']\n",
" sas_token = ''\n",
" account_options = {'account_name': \"cpdataeuwest\", 'sas_token': sas_token}\n",
" return fsspec.filesystem('az', **account_options)\n",
"\n",
"\n",
"def store_exists(fs: fsspec.filesystem, store_path: str) -> bool:\n",
" return fs.exists(store_path.split('windows.net/')[1])\n",
"\n",
"\n",
"def return_intake_cat_df():\n",
" return intake.open_esm_datastore(\n",
" 'https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/catalogs/global-downscaled-cmip6.json'\n",
" ).df\n",
"\n",
"\n",
"def create_time_summary_options(df: pd.DataFrame) -> dict:\n",
" \"\"\"Create dictionary of frequency options and zarr store uris.\"\"\"\n",
" time_summary_dict = {}\n",
" for freq in freq_dict.keys():\n",
" time_summary_dict.update(\n",
" {\n",
" freq_dict[freq]: list(\n",
" df['downscaled_daily_data_uri'].str.replace('day', freq_dict[freq])\n",
" )\n",
" }\n",
" )\n",
" return time_summary_dict\n",
"\n",
"\n",
"def resample_dataset(resampled_path: str, freq: str, output_path: str = None):\n",
" \"\"\"resamples zarr store to frequency (monthly, yearly). Writes output to zarr store\"\"\"\n",
" daily_path = resampled_path.replace(freq, 'day')\n",
" ds = xr.open_zarr(daily_path)\n",
" out_ds = utils.resample_wrapper(ds, freq=freq_dict_lookup[freq])\n",
" if output_path:\n",
" out_ds.to_zarr(output_path, mode='w', consolidated=True)\n",
" else:\n",
" out_ds.to_zarr(resampled_path, mode='w', consolidated=True)\n",
"\n",
"\n",
"def resample_time_summaries(time_summary_dict: dict, fs: fsspec.filesystem):\n",
" \"\"\"Iterates through aval time frequencies (monthly, yearly), checks if store exists, runs resample_dataset function.\"\"\"\n",
" for freq in time_summary_dict.keys():\n",
" for time_summary in time_summary_dict[freq]:\n",
" if not store_exists(fs, time_summary): # if fsspec finds the store does not exist:\n",
" output_path = 's3://carbonplan-scratch' + time_summary.split('.net')[1]\n",
" resample_dataset(time_summary, freq, output_path=output_path)\n",
"\n",
"\n",
"def update_catalog(intake_cat_df: pd.DataFrame, fs: fsspec.filesystem):\n",
" \"\"\"Updates catalog with newly resampled stores\"\"\"\n",
" for freq in freq_dict.keys():\n",
" for index, row in intake_cat_df.iterrows():\n",
" mod_row = row.copy(deep=True)\n",
" downscaled_freq_data_uri = mod_row.downscaled_daily_data_uri.replace(\n",
" 'day', freq_dict[freq]\n",
" )\n",
" if store_exists(fs, downscaled_freq_data_uri): # if the store exists\n",
" mod_row.timescale = freq_dict[freq]\n",
" mod_row.downscaled_daily_data_uri = downscaled_freq_data_uri\n",
" intake_cat_df.append(mod_row, ignore_index=False)\n",
" intake_cat_df.loc[len(intake_cat_df)] = mod_row\n",
"\n",
" return intake_cat_df.drop_duplicates(\n",
" subset=intake_cat_df.columns.difference(['downscaled_daily_data_uri']), keep='first'\n",
" )\n",
"\n",
"\n",
"# check catalog, then save and replace once QA'd."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea5d5936-80e1-42f3-8c93-3db368ec7143",
"metadata": {},
"outputs": [],
"source": [
"fs = get_fsspec_fs()\n",
"intake_cat_df = return_intake_cat_df()\n",
"time_summary_dict = create_time_summary_options(intake_cat_df)\n",
"resample_time_summaries(time_summary_dict, fs)\n",
"updated_catalog = update_catalog(intake_cat_df, fs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "815e17de-bf6e-4d60-9aab-28d953906de8",
"metadata": {},
"outputs": [],
"source": [
"# updated_catalog.to_csv('s3://carbonplan-scratch/updated_catalog.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77349920-e0b5-4cc4-b00d-30c63cb3bb41",
"metadata": {},
"outputs": [],
"source": [
"###\n",
"ds_month = xr.open_zarr(\n",
" 's3://carbonplan-scratch/cp-cmip/version1/data/DeepSD-BC/CMIP.CCCma.CanESM5.historical.r1i1p1f1.monthly.DeepSD-BC.pr.zarr'\n",
")\n",
"ds_year = xr.open_zarr(\n",
" 's3://carbonplan-scratch/cp-cmip/version1/data/DeepSD-BC/CMIP.CCCma.CanESM5.historical.r1i1p1f1.yearly.DeepSD-BC.pr.zarr'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cdbc96b7-9dab-444e-bc72-a8b16ce742f0",
"metadata": {},
"outputs": [],
"source": [
"ds_month"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a55d91c2-63cb-4d17-a016-1c93d588e1d5",
"metadata": {},
"outputs": [],
"source": [
"ds_year"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.9 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.9"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

1 comment on commit bf06467

@vercel
Copy link

@vercel vercel bot commented on bf06467 Oct 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.