fixed tau issues and added downloading coamps track

Deltares-research · Oct 18, 2024 · 0a72bb6 · 0a72bb6
1 parent 01c3974
commit 0a72bb6
Show file tree

Hide file tree

Showing 4 changed files with 93 additions and 31 deletions.
diff --git a/cht_meteo/cht/meteo/__init__.py b/cht_meteo/cht/meteo/__init__.py
@@ -5,4 +5,4 @@
 
 @author: ormondt
 """
-from .database import meteo_database
+from .database import MeteoDatabase
diff --git a/cht_meteo/cht/meteo/coamps_tc_forecast_s3.py b/cht_meteo/cht/meteo/coamps_tc_forecast_s3.py
@@ -4,6 +4,7 @@
 import urllib
 import tarfile as trf
 import xarray as xr
+import requests
 
 from cht_utils import fileops as fo
 from .dataset import MeteoDataset
@@ -50,6 +51,11 @@ def download_forecast_cycle(self, **kwargs):
             # Throw error if storm_number is not provided            
             print("Error: storm_number not provided")
             return
+
+        if "only_track" in kwargs:
+            only_track = kwargs["only_track"]
+        else:
+            only_track = False
 
         # For COAMPS-TC we always get the entire dataset (lon_range, lat_range time_range are not used)
 
@@ -63,11 +69,19 @@ def download_forecast_cycle(self, **kwargs):
 
         # Make folder for the forecast
         forecast_path = os.path.join(self.path, cycle_time_meteo) # Folder to store the forecast netcdf files
-        tar_file_path = os.path.join(forecast_path, "_TMP")  # Temporary folder to store the tar file
-
-        # Make folder for the forecast
         os.makedirs(forecast_path, exist_ok=True) 
-        os.makedirs(tar_file_path, exist_ok=True) 
+
+        # Start with the track file
+        get_storm_track(forecast_path, year, storm_number, cycle_time_coamps)
+
+        only_track = True
+        if only_track:
+            # No need to download the gridded forecast, apparently
+            return
+
+        # Make folder for the temporary files
+        tar_file_path = os.path.join(forecast_path, "_TMP")  # Temporary folder to store the tar file
+        os.makedirs(tar_file_path, exist_ok=True)
 
         tar_file_name = storm_number + "_" + cycle_time_coamps + "_netcdf.tar"
 
@@ -92,6 +106,7 @@ def download_forecast_cycle(self, **kwargs):
                 tar.extract(member, path=tar_file_path)
 
         # Convert all three resolutions to meteo format
+        print("Converting COAMPS-TC netcdf files to meteo netcdf files")
         for subset in self.subset:
             res = subset.name
             # Get list of all the files in the _TMP/netcdf folder
@@ -107,6 +122,9 @@ def download_forecast_cycle(self, **kwargs):
         # Remove the temporary folder
         fo.delete_folder(tar_file_path)
 
+        # Also try to download the track file
+        track_file_name = storm_number + "_" + cycle_time_coamps + "_track.txt"
+
 def convert_coamps_nc_to_meteo_nc(inpfile, outfile):
 
     # Open the COAMPS-TC netcdf file
@@ -141,3 +159,23 @@ def convert_coamps_nc_to_meteo_nc(inpfile, outfile):
     # Close the raw netcdf file
     dsin.close()
 
+def get_storm_track(track_path:str, year:int, storm:str, cycle:str):
+    """
+    Retrieves the storm track data for a given year, storm, and cycle.
+
+    Parameters:
+    year (int): The year of the storm track data.
+    storm (str): The name of the storm.
+    cycle (str): The cycle of the storm track data.
+
+    Returns:
+    bytes: The content of the storm track data.
+
+    """
+    try:
+        filename = os.path.join(track_path, f"TRK_COAMPS_CTCX_3_{cycle}_{storm}.trk")
+        url = f"https://coamps-tc-data.s3.us-east-2.amazonaws.com/deterministic/realtime/{year}/{storm}/{cycle}/TRK_COAMPS_CTCX_3_{cycle}_{storm}"
+        urllib.request.urlretrieve(url, filename)
+    except Exception as e:
+        print(f"Error downloading {url}")
+        print(e)
diff --git a/cht_meteo/cht/meteo/database.py b/cht_meteo/cht/meteo/database.py
@@ -37,7 +37,7 @@ def add_dataset(self, dataset_name, source_name, **kwargs):
                 md = MeteoDatasetGFSAnalysis0p50(name=dataset_name, path=dataset_path, **kwargs)
             else:
                 md = MeteoDataset(name=dataset_name)
-                print(f"Error: source {source_name} not recognized")
+                print(f"Error while reading meteo database : source {source_name} not recognized")
 
         else:
             # Use generic meteo dataset (this does not have download functionality)
@@ -58,7 +58,7 @@ def read_datasets(self, filename=None):
 
         # Check if the file exists
         if not os.path.exists(filename):
-            print(f"Error: file {filename} does not exist")
+            print(f"Error while reading meteo database : file {filename} does not exist")
             return    
 
         # Read the toml file
@@ -76,10 +76,13 @@ def read_datasets(self, filename=None):
                 lat_range = meteo_dataset["y_range"]
             else:
                 lat_range = None
+            if "tau" in meteo_dataset:
+                tau = meteo_dataset["tau"]
+            else:
+                tau = 0
 
             self.add_dataset(meteo_dataset["name"],
                             source_name=meteo_dataset["source"],
                             lon_range=lon_range,
-                            lat_range=lat_range)
-
-meteo_database = MeteoDatabase()
+                            lat_range=lat_range,
+                            tau=tau)
diff --git a/cht_meteo/cht/meteo/dataset.py b/cht_meteo/cht/meteo/dataset.py
@@ -6,8 +6,10 @@
 from pyproj import CRS
 from pyproj import Transformer
 from scipy.interpolate import RegularGridInterpolator
+import time
 
 from .dataset_to_delft3d import write_to_delft3d_ascii
+from .dataset_to_json_wind import write_wind_to_json
 
 import cht_utils.fileops as fo
 
@@ -24,6 +26,7 @@ def __init__(self, **kwargs):
         self.lat_range = None          # Latitude range of the dataset
         self.var_names = ["wind_u", "wind_v", "barometric_pressure", "precipitation"] # Variable names in the dataset
         self.crs = CRS(4326)           # Coordinate reference system of the dataset
+        self.tau = 0                   # Time interval in hours between cycle and data
 
         # Loop through kwargs to set attributes
         reserved_keys = ["x", "y", "lon", "lat", "time"]    
@@ -186,13 +189,13 @@ def collect(self, time_range, **kwargs):
         if "tau" in kwargs:
             tau = kwargs["tau"]
         else:
-            tau = 0
+            tau = self.tau
 
+        last_cycle_time = None
         if "last_cycle" in kwargs:
-            last_cycle_string = kwargs["last_cycle"]
-            last_cycle_time = datetime.datetime.strptime(last_cycle_string, "%Y%m%d_%H")
-        else:
-            last_cycle_time = None
+            if kwargs["last_cycle"] is not None:
+                # last_cycle_time = datetime.datetime.strptime(kwargs["last_cycle"], "%Y%m%d_%H")
+                last_cycle_time = kwargs["last_cycle"]
 
         # Subsets are only used when there are subsets with different resolutions (e.g. as in COAMPS-TC) 
         if len(self.subset) > 0:
@@ -225,6 +228,8 @@ def collect(self, time_range, **kwargs):
                 # Make list of all cycles
                 all_cycle_paths = fo.list_folders(os.path.join(self.path, "*"))
 
+                icycle = -1
+
                 # Loop through all cycle paths
                 for cycle_path in all_cycle_paths:
 
@@ -245,6 +250,8 @@ def collect(self, time_range, **kwargs):
                     # Find all times available in this cycle as it may contain our data
                     files_in_cycle = fo.list_files(os.path.join(cycle_path, "*" + subsetstr + "*.nc"))
 
+                    icycle += 1
+
                     # Loop through all files in this cycle
                     for ifile, file in enumerate(files_in_cycle):
 
@@ -254,10 +261,10 @@ def collect(self, time_range, **kwargs):
                         if ifile == 0:
                             self.last_analysis_time = t_file
 
-                        if tau > 0:                            
+                        if tau > 0 and icycle > 0:                            
                             # Compute time interval between cycle and file
-                            th = (t_file - t_cycle).total_seconds() / 3600
-                            if th > tau:
+                            th = int((t_file - t_cycle).total_seconds() / 3600)
+                            if th < tau:
                                 # We can skip this file
                                 continue
 
@@ -482,6 +489,8 @@ def cut_out(self,
             t = self.ds["time"].values
 
         if time_range is not None:
+            # convert values in time_range to np.datetime64
+            time_range = [np.datetime64(t) for t in time_range]
             t = t[(t>=time_range[0]) & (t<=time_range[1])]
 
         # Create new dataset
@@ -502,24 +511,24 @@ def interpolate_dataset(self, dataset, copy_time=False):
         # Loop through variables
         for var_name in self.var_names:
             da = self.ds[var_name].copy()
-            for it, time in enumerate(self.ds["time"].values):
+            for it, t in enumerate(self.ds["time"].values):
                 # Get data
-                v = dataset.interpolate_variable(var_name, time, xg, yg, crs=self.crs)
+                v = dataset.interpolate_variable(var_name, t, xg, yg, crs=self.crs)
                 # Set points in v equal to points in original data vori where vori already has a value
-                vori = da.loc[dict(time=time)].values[:]
+                vori = da.loc[dict(time=t)].values[:]
                 not_nan = np.where(~np.isnan(vori))
                 v[not_nan] = vori[not_nan]
-                da.loc[dict(time=time)] = v
+                da.loc[dict(time=t)] = v
 
-            self.ds[var_name] = da    
+            self.ds[var_name] = da
 
     def merge_datasets(self, datasets, **kwargs):
         """Merge datasets. This is useful when we have multiple datasets with different resolutions."""
         # For now we just return the first dataset
         for dataset in datasets:
             self.interpolate_dataset(dataset)
 
-    def interpolate_variable(self, var_name: str, time: datetime.datetime, x: np.array, y: np.array, crs=None, fill_missing_data=False):
+    def interpolate_variable(self, var_name: str, t: datetime.datetime, x: np.array, y: np.array, crs=None, fill_missing_data=False):
         """Returns numpy array with interpolated values of quantity at requested_time and lon, lat. If quantity is a vector, the function returns the x-component of the vector. If the quantity is not found, the function returns an array with zeros."""
 
         # Check shape of x and y. They can be either:
@@ -563,25 +572,34 @@ def interpolate_variable(self, var_name: str, time: datetime.datetime, x: np.arr
 
         # Loop in reverse order to get the highest resolution data first
         for isub in range(nsub - 1, -1, -1):
-
+            
             if subsets:
                 ds = self.subset[isub].ds
             else:
                 ds = self.ds
 
+            # Check if t is a numpy.datetime64
+            if not isinstance(t, np.datetime64):
+                t = np.datetime64(t)
+
             # Get data
-            ds = ds.interp(time=time)
+            if t in ds["time"].values[:]:
+                # Get data at time t
+                da = ds[var_name].sel(time=t)
+            else:
+                # Interpolate data at time t                
+                da = ds[var_name].interp(time=t)
 
             # Get horizontal coordinates
             if self.crs.is_geographic:
-                x = ds["lon"].values
-                y = ds["lat"].values
+                x = da["lon"].values
+                y = da["lat"].values
             else:
-                x = ds["x"].values
-                y = ds["y"].values
+                x = da["x"].values
+                y = da["y"].values
 
             # Make interpolator
-            interp = RegularGridInterpolator((y, x), ds[var_name].values[:])
+            interp = RegularGridInterpolator((y, x), da.values[:])
 
             # Find points outside of grid
             iout = np.where((xg<np.min(x)) | (xg>np.max(x)) | (yg<np.min(y)) | (yg>np.max(y)))
@@ -640,6 +658,9 @@ def to_delft3d(self, file_name, version="1.03", path=None, header_comments=False
         # else:
         #     write_to_delft3d_netcdf(self, file_name, version, path, header_comments, refdate, parameters, time_range)
 
+    def wind_to_json(self, file_name, time_range=None, js=True, iref=1):
+        write_wind_to_json(self, file_name, time_range=time_range, iref=iref, js=js)
+
     def get_coordinates(self, *args):
         """Returns the horizontal coordinates of the dataset."""
         if len(self.subset) > 0: