-
-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
notebook to generate monthly/yearly aggregations for msft bucket (#289)
* notebook to generate monthly/yearly aggregations for msft bucket * Update flows/resample_to_yearly_monthly.ipynb Co-authored-by: Anderson Banihirwe <axbanihirwe@ualr.edu> * Update flows/resample_to_yearly_monthly.ipynb Co-authored-by: Anderson Banihirwe <axbanihirwe@ualr.edu> Co-authored-by: Anderson Banihirwe <axbanihirwe@ualr.edu>
- Loading branch information
1 parent
e666b5b
commit bf06467
Showing
1 changed file
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "82468291-f425-4fe4-a85e-5f8725d1030a", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import intake\n", | ||
"import xarray as xr\n", | ||
"import warnings\n", | ||
"import fsspec\n", | ||
"import pandas as pd\n", | ||
"import os\n", | ||
"from cmip6_downscaling.methods.common import utils\n", | ||
"from dask.distributed import Client\n", | ||
"\n", | ||
"warnings.simplefilter(action='ignore', category=FutureWarning)\n", | ||
"freq_dict = {'1AS': 'yearly', '1MS': 'monthly'}\n", | ||
"freq_dict_lookup = {'yearly': '1AS', 'monthly': '1MS'}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "5b051726-fe98-4102-a1a6-7df9cdbbf06b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"client = Client()\n", | ||
"client" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "1f8ae420-0c51-495b-a2ba-9b95f2bff24e", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_fsspec_fs():\n", | ||
" if os.environ.get('AZURE_STORAGE_CONNECTION_STRING'):\n", | ||
" del os.environ['AZURE_STORAGE_CONNECTION_STRING']\n", | ||
" sas_token = ''\n", | ||
" account_options = {'account_name': \"cpdataeuwest\", 'sas_token': sas_token}\n", | ||
" return fsspec.filesystem('az', **account_options)\n", | ||
"\n", | ||
"\n", | ||
"def store_exists(fs: fsspec.filesystem, store_path: str) -> bool:\n", | ||
" return fs.exists(store_path.split('windows.net/')[1])\n", | ||
"\n", | ||
"\n", | ||
"def return_intake_cat_df():\n", | ||
" return intake.open_esm_datastore(\n", | ||
" 'https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/catalogs/global-downscaled-cmip6.json'\n", | ||
" ).df\n", | ||
"\n", | ||
"\n", | ||
"def create_time_summary_options(df: pd.DataFrame) -> dict:\n", | ||
" \"\"\"Create dictionary of frequency options and zarr store uris.\"\"\"\n", | ||
" time_summary_dict = {}\n", | ||
" for freq in freq_dict.keys():\n", | ||
" time_summary_dict.update(\n", | ||
" {\n", | ||
" freq_dict[freq]: list(\n", | ||
" df['downscaled_daily_data_uri'].str.replace('day', freq_dict[freq])\n", | ||
" )\n", | ||
" }\n", | ||
" )\n", | ||
" return time_summary_dict\n", | ||
"\n", | ||
"\n", | ||
"def resample_dataset(resampled_path: str, freq: str, output_path: str = None):\n", | ||
" \"\"\"resamples zarr store to frequency (monthly, yearly). Writes output to zarr store\"\"\"\n", | ||
" daily_path = resampled_path.replace(freq, 'day')\n", | ||
" ds = xr.open_zarr(daily_path)\n", | ||
" out_ds = utils.resample_wrapper(ds, freq=freq_dict_lookup[freq])\n", | ||
" if output_path:\n", | ||
" out_ds.to_zarr(output_path, mode='w', consolidated=True)\n", | ||
" else:\n", | ||
" out_ds.to_zarr(resampled_path, mode='w', consolidated=True)\n", | ||
"\n", | ||
"\n", | ||
"def resample_time_summaries(time_summary_dict: dict, fs: fsspec.filesystem):\n", | ||
" \"\"\"Iterates through aval time frequencies (monthly, yearly), checks if store exists, runs resample_dataset function.\"\"\"\n", | ||
" for freq in time_summary_dict.keys():\n", | ||
" for time_summary in time_summary_dict[freq]:\n", | ||
" if not store_exists(fs, time_summary): # if fsspec finds the store does not exist:\n", | ||
" output_path = 's3://carbonplan-scratch' + time_summary.split('.net')[1]\n", | ||
" resample_dataset(time_summary, freq, output_path=output_path)\n", | ||
"\n", | ||
"\n", | ||
"def update_catalog(intake_cat_df: pd.DataFrame, fs: fsspec.filesystem):\n", | ||
" \"\"\"Updates catalog with newly resampled stores\"\"\"\n", | ||
" for freq in freq_dict.keys():\n", | ||
" for index, row in intake_cat_df.iterrows():\n", | ||
" mod_row = row.copy(deep=True)\n", | ||
" downscaled_freq_data_uri = mod_row.downscaled_daily_data_uri.replace(\n", | ||
" 'day', freq_dict[freq]\n", | ||
" )\n", | ||
" if store_exists(fs, downscaled_freq_data_uri): # if the store exists\n", | ||
" mod_row.timescale = freq_dict[freq]\n", | ||
" mod_row.downscaled_daily_data_uri = downscaled_freq_data_uri\n", | ||
" intake_cat_df.append(mod_row, ignore_index=False)\n", | ||
" intake_cat_df.loc[len(intake_cat_df)] = mod_row\n", | ||
"\n", | ||
" return intake_cat_df.drop_duplicates(\n", | ||
" subset=intake_cat_df.columns.difference(['downscaled_daily_data_uri']), keep='first'\n", | ||
" )\n", | ||
"\n", | ||
"\n", | ||
"# check catalog, then save and replace once QA'd." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ea5d5936-80e1-42f3-8c93-3db368ec7143", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"fs = get_fsspec_fs()\n", | ||
"intake_cat_df = return_intake_cat_df()\n", | ||
"time_summary_dict = create_time_summary_options(intake_cat_df)\n", | ||
"resample_time_summaries(time_summary_dict, fs)\n", | ||
"updated_catalog = update_catalog(intake_cat_df, fs)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "815e17de-bf6e-4d60-9aab-28d953906de8", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# updated_catalog.to_csv('s3://carbonplan-scratch/updated_catalog.csv',index=False)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "77349920-e0b5-4cc4-b00d-30c63cb3bb41", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"###\n", | ||
"ds_month = xr.open_zarr(\n", | ||
" 's3://carbonplan-scratch/cp-cmip/version1/data/DeepSD-BC/CMIP.CCCma.CanESM5.historical.r1i1p1f1.monthly.DeepSD-BC.pr.zarr'\n", | ||
")\n", | ||
"ds_year = xr.open_zarr(\n", | ||
" 's3://carbonplan-scratch/cp-cmip/version1/data/DeepSD-BC/CMIP.CCCma.CanESM5.historical.r1i1p1f1.yearly.DeepSD-BC.pr.zarr'\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "cdbc96b7-9dab-444e-bc72-a8b16ce742f0", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ds_month" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "a55d91c2-63cb-4d17-a016-1c93d588e1d5", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ds_year" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3.8.9 64-bit", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.9" | ||
}, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
bf06467
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
cmip6-downscaling – ./
cmip6-downscaling-git-main-carbonplan.vercel.app
cmip6-downscaling.docs.carbonplan.org
cmip6-downscaling-carbonplan.vercel.app
cmip6-downscaling.vercel.app