Skip to content

Commit

Permalink
fixed tau issues and added downloading coamps track
Browse files Browse the repository at this point in the history
  • Loading branch information
maartenvanormondt committed Oct 18, 2024
1 parent 01c3974 commit 0a72bb6
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 31 deletions.
2 changes: 1 addition & 1 deletion cht_meteo/cht/meteo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
@author: ormondt
"""
from .database import meteo_database
from .database import MeteoDatabase
46 changes: 42 additions & 4 deletions cht_meteo/cht/meteo/coamps_tc_forecast_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import urllib
import tarfile as trf
import xarray as xr
import requests

from cht_utils import fileops as fo
from .dataset import MeteoDataset
Expand Down Expand Up @@ -50,6 +51,11 @@ def download_forecast_cycle(self, **kwargs):
# Throw error if storm_number is not provided
print("Error: storm_number not provided")
return

if "only_track" in kwargs:
only_track = kwargs["only_track"]
else:
only_track = False

# For COAMPS-TC we always get the entire dataset (lon_range, lat_range time_range are not used)

Expand All @@ -63,11 +69,19 @@ def download_forecast_cycle(self, **kwargs):

# Make folder for the forecast
forecast_path = os.path.join(self.path, cycle_time_meteo) # Folder to store the forecast netcdf files
tar_file_path = os.path.join(forecast_path, "_TMP") # Temporary folder to store the tar file

# Make folder for the forecast
os.makedirs(forecast_path, exist_ok=True)
os.makedirs(tar_file_path, exist_ok=True)

# Start with the track file
get_storm_track(forecast_path, year, storm_number, cycle_time_coamps)

only_track = True
if only_track:
# No need to download the gridded forecast, apparently
return

# Make folder for the temporary files
tar_file_path = os.path.join(forecast_path, "_TMP") # Temporary folder to store the tar file
os.makedirs(tar_file_path, exist_ok=True)

tar_file_name = storm_number + "_" + cycle_time_coamps + "_netcdf.tar"

Expand All @@ -92,6 +106,7 @@ def download_forecast_cycle(self, **kwargs):
tar.extract(member, path=tar_file_path)

# Convert all three resolutions to meteo format
print("Converting COAMPS-TC netcdf files to meteo netcdf files")
for subset in self.subset:
res = subset.name
# Get list of all the files in the _TMP/netcdf folder
Expand All @@ -107,6 +122,9 @@ def download_forecast_cycle(self, **kwargs):
# Remove the temporary folder
fo.delete_folder(tar_file_path)

# Also try to download the track file
track_file_name = storm_number + "_" + cycle_time_coamps + "_track.txt"

def convert_coamps_nc_to_meteo_nc(inpfile, outfile):

# Open the COAMPS-TC netcdf file
Expand Down Expand Up @@ -141,3 +159,23 @@ def convert_coamps_nc_to_meteo_nc(inpfile, outfile):
# Close the raw netcdf file
dsin.close()

def get_storm_track(track_path:str, year:int, storm:str, cycle:str):
"""
Retrieves the storm track data for a given year, storm, and cycle.
Parameters:
year (int): The year of the storm track data.
storm (str): The name of the storm.
cycle (str): The cycle of the storm track data.
Returns:
bytes: The content of the storm track data.
"""
try:
filename = os.path.join(track_path, f"TRK_COAMPS_CTCX_3_{cycle}_{storm}.trk")
url = f"https://coamps-tc-data.s3.us-east-2.amazonaws.com/deterministic/realtime/{year}/{storm}/{cycle}/TRK_COAMPS_CTCX_3_{cycle}_{storm}"
urllib.request.urlretrieve(url, filename)
except Exception as e:
print(f"Error downloading {url}")
print(e)
13 changes: 8 additions & 5 deletions cht_meteo/cht/meteo/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def add_dataset(self, dataset_name, source_name, **kwargs):
md = MeteoDatasetGFSAnalysis0p50(name=dataset_name, path=dataset_path, **kwargs)
else:
md = MeteoDataset(name=dataset_name)
print(f"Error: source {source_name} not recognized")
print(f"Error while reading meteo database : source {source_name} not recognized")

else:
# Use generic meteo dataset (this does not have download functionality)
Expand All @@ -58,7 +58,7 @@ def read_datasets(self, filename=None):

# Check if the file exists
if not os.path.exists(filename):
print(f"Error: file {filename} does not exist")
print(f"Error while reading meteo database : file {filename} does not exist")
return

# Read the toml file
Expand All @@ -76,10 +76,13 @@ def read_datasets(self, filename=None):
lat_range = meteo_dataset["y_range"]
else:
lat_range = None
if "tau" in meteo_dataset:
tau = meteo_dataset["tau"]
else:
tau = 0

self.add_dataset(meteo_dataset["name"],
source_name=meteo_dataset["source"],
lon_range=lon_range,
lat_range=lat_range)

meteo_database = MeteoDatabase()
lat_range=lat_range,
tau=tau)
63 changes: 42 additions & 21 deletions cht_meteo/cht/meteo/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from pyproj import CRS
from pyproj import Transformer
from scipy.interpolate import RegularGridInterpolator
import time

from .dataset_to_delft3d import write_to_delft3d_ascii
from .dataset_to_json_wind import write_wind_to_json

import cht_utils.fileops as fo

Expand All @@ -24,6 +26,7 @@ def __init__(self, **kwargs):
self.lat_range = None # Latitude range of the dataset
self.var_names = ["wind_u", "wind_v", "barometric_pressure", "precipitation"] # Variable names in the dataset
self.crs = CRS(4326) # Coordinate reference system of the dataset
self.tau = 0 # Time interval in hours between cycle and data

# Loop through kwargs to set attributes
reserved_keys = ["x", "y", "lon", "lat", "time"]
Expand Down Expand Up @@ -186,13 +189,13 @@ def collect(self, time_range, **kwargs):
if "tau" in kwargs:
tau = kwargs["tau"]
else:
tau = 0
tau = self.tau

last_cycle_time = None
if "last_cycle" in kwargs:
last_cycle_string = kwargs["last_cycle"]
last_cycle_time = datetime.datetime.strptime(last_cycle_string, "%Y%m%d_%H")
else:
last_cycle_time = None
if kwargs["last_cycle"] is not None:
# last_cycle_time = datetime.datetime.strptime(kwargs["last_cycle"], "%Y%m%d_%H")
last_cycle_time = kwargs["last_cycle"]

# Subsets are only used when there are subsets with different resolutions (e.g. as in COAMPS-TC)
if len(self.subset) > 0:
Expand Down Expand Up @@ -225,6 +228,8 @@ def collect(self, time_range, **kwargs):
# Make list of all cycles
all_cycle_paths = fo.list_folders(os.path.join(self.path, "*"))

icycle = -1

# Loop through all cycle paths
for cycle_path in all_cycle_paths:

Expand All @@ -245,6 +250,8 @@ def collect(self, time_range, **kwargs):
# Find all times available in this cycle as it may contain our data
files_in_cycle = fo.list_files(os.path.join(cycle_path, "*" + subsetstr + "*.nc"))

icycle += 1

# Loop through all files in this cycle
for ifile, file in enumerate(files_in_cycle):

Expand All @@ -254,10 +261,10 @@ def collect(self, time_range, **kwargs):
if ifile == 0:
self.last_analysis_time = t_file

if tau > 0:
if tau > 0 and icycle > 0:
# Compute time interval between cycle and file
th = (t_file - t_cycle).total_seconds() / 3600
if th > tau:
th = int((t_file - t_cycle).total_seconds() / 3600)
if th < tau:
# We can skip this file
continue

Expand Down Expand Up @@ -482,6 +489,8 @@ def cut_out(self,
t = self.ds["time"].values

if time_range is not None:
# convert values in time_range to np.datetime64
time_range = [np.datetime64(t) for t in time_range]
t = t[(t>=time_range[0]) & (t<=time_range[1])]

# Create new dataset
Expand All @@ -502,24 +511,24 @@ def interpolate_dataset(self, dataset, copy_time=False):
# Loop through variables
for var_name in self.var_names:
da = self.ds[var_name].copy()
for it, time in enumerate(self.ds["time"].values):
for it, t in enumerate(self.ds["time"].values):
# Get data
v = dataset.interpolate_variable(var_name, time, xg, yg, crs=self.crs)
v = dataset.interpolate_variable(var_name, t, xg, yg, crs=self.crs)
# Set points in v equal to points in original data vori where vori already has a value
vori = da.loc[dict(time=time)].values[:]
vori = da.loc[dict(time=t)].values[:]
not_nan = np.where(~np.isnan(vori))
v[not_nan] = vori[not_nan]
da.loc[dict(time=time)] = v
da.loc[dict(time=t)] = v

self.ds[var_name] = da
self.ds[var_name] = da

def merge_datasets(self, datasets, **kwargs):
"""Merge datasets. This is useful when we have multiple datasets with different resolutions."""
# For now we just return the first dataset
for dataset in datasets:
self.interpolate_dataset(dataset)

def interpolate_variable(self, var_name: str, time: datetime.datetime, x: np.array, y: np.array, crs=None, fill_missing_data=False):
def interpolate_variable(self, var_name: str, t: datetime.datetime, x: np.array, y: np.array, crs=None, fill_missing_data=False):
"""Returns numpy array with interpolated values of quantity at requested_time and lon, lat. If quantity is a vector, the function returns the x-component of the vector. If the quantity is not found, the function returns an array with zeros."""

# Check shape of x and y. They can be either:
Expand Down Expand Up @@ -563,25 +572,34 @@ def interpolate_variable(self, var_name: str, time: datetime.datetime, x: np.arr

# Loop in reverse order to get the highest resolution data first
for isub in range(nsub - 1, -1, -1):

if subsets:
ds = self.subset[isub].ds
else:
ds = self.ds

# Check if t is a numpy.datetime64
if not isinstance(t, np.datetime64):
t = np.datetime64(t)

# Get data
ds = ds.interp(time=time)
if t in ds["time"].values[:]:
# Get data at time t
da = ds[var_name].sel(time=t)
else:
# Interpolate data at time t
da = ds[var_name].interp(time=t)

# Get horizontal coordinates
if self.crs.is_geographic:
x = ds["lon"].values
y = ds["lat"].values
x = da["lon"].values
y = da["lat"].values
else:
x = ds["x"].values
y = ds["y"].values
x = da["x"].values
y = da["y"].values

# Make interpolator
interp = RegularGridInterpolator((y, x), ds[var_name].values[:])
interp = RegularGridInterpolator((y, x), da.values[:])

# Find points outside of grid
iout = np.where((xg<np.min(x)) | (xg>np.max(x)) | (yg<np.min(y)) | (yg>np.max(y)))
Expand Down Expand Up @@ -640,6 +658,9 @@ def to_delft3d(self, file_name, version="1.03", path=None, header_comments=False
# else:
# write_to_delft3d_netcdf(self, file_name, version, path, header_comments, refdate, parameters, time_range)

def wind_to_json(self, file_name, time_range=None, js=True, iref=1):
write_wind_to_json(self, file_name, time_range=time_range, iref=iref, js=js)

def get_coordinates(self, *args):
"""Returns the horizontal coordinates of the dataset."""
if len(self.subset) > 0:
Expand Down

0 comments on commit 0a72bb6

Please sign in to comment.