diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 0000000..c654779 --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,59 @@ +name: CI + +on: + push: + branches: + - '*' + pull_request: + branches: + - main + +jobs: + + test: + name: ${{ matrix.python-version }}-build + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.12",] + steps: + - uses: actions/checkout@v4 + + - name: Create conda environment + uses: mamba-org/setup-micromamba@v1 + with: + cache-downloads: true + cache-environment: true + micromamba-version: 'latest' + environment-file: ci/environment.yml + create-args: | + python=${{ matrix.python-version }} + + - name: Environment info + shell: bash + run: | + conda info + printenv + + - name: Install C-Star + shell: micromamba-shell {0} + run: | + python - V + python -m pip install -e cstar_ocean --no-deps --force-reinstall + + - name: Running Tests + shell: bash -l {0} + run: | + python -V + python -u -m coverage run --rcfile=coverage.toml cstar_ocean/tests/test_roms_marbl_example.py + + - name: Get coverage report + shell: bash -l {0} + run: | + coverage report -m ; coverage xml + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./coverage.xml diff --git a/README.md b/README.md index a0898f9..29e47d0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +[![codecov](https://codecov.io/github/CWorthy-ocean/C-Star/graph/badge.svg?token=EGCZJ22RTC)](https://codecov.io/github/CWorthy-ocean/C-Star) # C-Star Computational Systems for Tracking Ocean Carbon diff --git a/ci/environment.yml b/ci/environment.yml new file mode 100644 index 0000000..8d50543 --- /dev/null +++ b/ci/environment.yml @@ -0,0 +1,15 @@ +name: cstar_env +channels: + - conda-forge +dependencies: + - python>=3.10 + - pooch + - pyyaml + - python-dateutil + - coverage + - numpy + - compilers + - netcdf-fortran + - mpich + - nco + - ncview diff --git a/coverage.toml b/coverage.toml new file mode 100644 index 0000000..037b327 --- /dev/null +++ b/coverage.toml @@ -0,0 +1,7 @@ +[tool.coverage.run] +source = ["cstar_ocean"] +omit = [ + "tests/*", + "**/__init__.py", + "setup.py", +] diff --git a/cstar_ocean/cstar_ocean/additional_code.py b/cstar_ocean/cstar_ocean/additional_code.py index 6d265a2..12b1fd3 100644 --- a/cstar_ocean/cstar_ocean/additional_code.py +++ b/cstar_ocean/cstar_ocean/additional_code.py @@ -1,9 +1,9 @@ import os import shutil import tempfile -import subprocess from typing import Optional, List from cstar_ocean.base_model import BaseModel +from cstar_ocean.utils import _clone_and_checkout class AdditionalCode: @@ -122,14 +122,11 @@ def get(self, local_path: str): The local path (typically `Case.caseroot`) where the additional code will be curated """ with tempfile.TemporaryDirectory() as tmp_dir: - print(f"cloning {self.source_repo} into temporary directory {tmp_dir}") - subprocess.run( - f"git clone {self.source_repo} {tmp_dir}", check=True, shell=True + _clone_and_checkout( + source_repo=self.source_repo, + local_path=tmp_dir, + checkout_target=self.checkout_target, ) - subprocess.run( - f"git checkout {self.checkout_target}", cwd=tmp_dir, shell=True - ) - # TODO if checkout fails, this should fail for file_type in ["source_mods", "namelists"]: file_list = getattr(self, file_type) diff --git a/cstar_ocean/cstar_ocean/base_model.py b/cstar_ocean/cstar_ocean/base_model.py index 25ccdfe..c9fe3e0 100644 --- a/cstar_ocean/cstar_ocean/base_model.py +++ b/cstar_ocean/cstar_ocean/base_model.py @@ -1,9 +1,11 @@ import os import shutil import subprocess +from typing import Optional from abc import ABC, abstractmethod from cstar_ocean.utils import ( _get_hash_from_checkout_target, + _clone_and_checkout, _get_repo_remote, _get_repo_head_hash, _write_to_config_file, @@ -58,7 +60,7 @@ class BaseModel(ABC): handle_local_config_status() prompts the user to run get() if the model cannot be found. """ - def __init__(self, source_repo=None, checkout_target=None): + def __init__(self, source_repo=Optional[str], checkout_target=Optional[str]): """ Initialize a BaseModel object manually from a source repository and checkout target. @@ -349,9 +351,13 @@ def get(self, target: str): the path where ROMS will be cloned and compiled """ - # Get the REPO and checkout the right version - subprocess.run(f"git clone {self.source_repo} {target}", shell=True) - subprocess.run(f"git -C {target} checkout {self.checkout_target}", shell=True) + # TODO: Situation where environment variables like ROMS_ROOT are not set... + # ... but repo already exists at local_path results in an error rather than a prompt + _clone_and_checkout( + source_repo=self.source_repo, + local_path=target, + checkout_target=self.checkout_target, + ) # Set environment variables for this session: os.environ["ROMS_ROOT"] = target @@ -359,7 +365,7 @@ def get(self, target: str): # Set the configuration file to be read by __init__.py for future sessions: config_file_str = ( - f'os.environ["ROMS_ROOT"]="{target}"\nos.environ["PATH"]+=":' + f' os.environ["ROMS_ROOT"]="{target}"\n os.environ["PATH"]+=":' + f'{target}/Tools-Roms"\n' ) @@ -424,17 +430,18 @@ def get(self, target: str): target: str The local path where MARBL will be cloned and compiled """ - - # FIXME: duplicate code from ROMSBaseModel.get() - subprocess.run(f"git clone {self.source_repo} {target}", shell=True) - subprocess.run(f"git -C {target} checkout {self.checkout_target}", shell=True) + _clone_and_checkout( + source_repo=self.source_repo, + local_path=target, + checkout_target=self.checkout_target, + ) # Set environment variables for this session: os.environ["MARBL_ROOT"] = target # Set the configuration file to be read by __init__.py for future sessions: # QUESTION: how better to handle this? - config_file_str = f'\nos.environ["MARBL_ROOT"]="{target}"\n' + config_file_str = f'\n os.environ["MARBL_ROOT"]="{target}"\n' _write_to_config_file(config_file_str) # Make things diff --git a/cstar_ocean/cstar_ocean/component.py b/cstar_ocean/cstar_ocean/component.py index 61a14ea..1251c3f 100644 --- a/cstar_ocean/cstar_ocean/component.py +++ b/cstar_ocean/cstar_ocean/component.py @@ -1,12 +1,20 @@ import os import glob +import warnings import subprocess from abc import ABC, abstractmethod from typing import List, Optional, Any from cstar_ocean.utils import _calculate_node_distribution, _replace_text_in_file from cstar_ocean.base_model import ROMSBaseModel, BaseModel -from cstar_ocean.input_dataset import InputDataset +from cstar_ocean.input_dataset import ( + InputDataset, + InitialConditions, + ModelGrid, + SurfaceForcing, + BoundaryForcing, + TidalForcing, +) from cstar_ocean.additional_code import AdditionalCode from cstar_ocean.environment import ( @@ -69,7 +77,7 @@ def __init__(self, **kwargs: Any): An intialized Component object """ - # FIXME: do Type checking here + # TODO: do Type checking here if "base_model" not in kwargs or not isinstance( kwargs["base_model"], BaseModel ): @@ -86,10 +94,14 @@ def __init__(self, **kwargs: Any): ) def __str__(self): + # Header name = self.__class__.__name__ base_str = f"{name} object " base_str = "-" * (len(name) + 7) + "\n" + base_str base_str += "\n" + "-" * (len(name) + 7) + + # Attrs + base_str += f"\ntime_step: {self.time_step} seconds" base_str += "\nBuilt from: " NAC = 0 if self.additional_code is None else 1 @@ -107,26 +119,27 @@ def __str__(self): ) base_str += "\n\nDiscretization info:" - if hasattr(self,'n_procs_x') and self.n_procs_x is not None: + if hasattr(self, "time_step") and self.time_step is not None: + base_str += "\ntime_step: " + str(self.time_step) + if hasattr(self, "n_procs_x") and self.n_procs_x is not None: base_str += ( - "\nn_procs_x:" + "\nn_procs_x: " + str(self.n_procs_x) + " (Number of x-direction processors)" ) - if hasattr(self,'n_procs_y') and self.n_procs_y is not None: + if hasattr(self, "n_procs_y") and self.n_procs_y is not None: base_str += ( "\nn_procs_y:" + str(self.n_procs_y) + " (Number of y-direction processors)" ) - if hasattr(self,'n_levels') and self.n_levels is not None: + if hasattr(self, "n_levels") and self.n_levels is not None: base_str += "\nn_levels:" + str(self.n_levels) - if hasattr(self,'nx') and self.nx is not None: + if hasattr(self, "nx") and self.nx is not None: base_str += "\nnx:" + str(self.nx) - if hasattr(self,'ny') and self.ny is not None: + if hasattr(self, "ny") and self.ny is not None: base_str += "\nny:" + str(self.ny) - - if hasattr(self,'exe_path') and self.exe_path is not None: + if hasattr(self, "exe_path") and self.exe_path is not None: base_str += "\n\nIs compiled: True" base_str += "\n exe_path: " + self.exe_path return base_str @@ -186,6 +199,8 @@ class ROMSComponent(Component): input_datasets: InputDataset or list of InputDatasets Any spatiotemporal data needed to run this instance of ROMS e.g. initial conditions, surface forcing, etc. + time_step: int, Optional, default=1 + The time step with which to run ROMS in this configuration nx,ny,n_levels: int The number of x and y points and vertical levels in the domain associated with this object n_procs_x,n_procs_y: int @@ -213,6 +228,7 @@ def __init__( base_model: ROMSBaseModel, additional_code: Optional[AdditionalCode] = None, input_datasets: Optional[InputDataset | List[InputDataset]] = None, + time_step: int = 1, nx: Optional[int] = None, ny: Optional[int] = None, n_levels: Optional[int] = None, @@ -252,6 +268,7 @@ def __init__( input_datasets=input_datasets, ) # QUESTION: should all these attrs be passed in as a single "discretization" arg of type dict? + self.time_step: int = time_step self.nx: Optional[int] = nx self.ny: Optional[int] = ny self.n_levels: Optional[int] = n_levels @@ -308,19 +325,22 @@ def pre_run(self): # Partition input datasets if self.input_datasets is not None: - datasets_to_partition = ( - self.input_datasets - if isinstance(self.input_datasets, list) - else [ + if isinstance(self.input_datasets, InputDataset): + dataset_list = [ self.input_datasets, ] - ) + elif isinstance(self.input_datasets, list): + dataset_list = self.input_datasets + else: + dataset_list = [] + + datasets_to_partition = [d for d in dataset_list if d.exists_locally] for f in datasets_to_partition: dspath = f.local_path fname = os.path.basename(f.source) - os.makedirs(dspath + "/PARTITIONED", exist_ok=True) + os.makedirs(os.path.dirname(dspath) + "/PARTITIONED", exist_ok=True) subprocess.run( "partit " + str(self.n_procs_x) @@ -328,9 +348,38 @@ def pre_run(self): + str(self.n_procs_y) + " ../" + fname, - cwd=dspath + "PARTITIONED", + cwd=os.path.dirname(dspath) + "/PARTITIONED", shell=True, ) + # Edit namelist file to contain dataset paths + forstr = "" + namelist_path = ( + self.additional_code.local_path + + "/" + + self.additional_code.namelists[0] + ) + for f in datasets_to_partition: + partitioned_path = ( + os.path.dirname(f.local_path) + + "/PARTITIONED/" + + os.path.basename(f.local_path) + ) + if isinstance(f, ModelGrid): + gridstr = " " + partitioned_path + "\n" + _replace_text_in_file( + namelist_path, "__GRID_FILE_PLACEHOLDER__", gridstr + ) + elif isinstance(f, InitialConditions): + icstr = " " + partitioned_path + "\n" + _replace_text_in_file( + namelist_path, "__INITIAL_CONDITION_FILE_PLACEHOLDER__", icstr + ) + elif type(f) in [SurfaceForcing, TidalForcing, BoundaryForcing]: + forstr += " " + partitioned_path + "\n" + + _replace_text_in_file( + namelist_path, "__FORCING_FILES_PLACEHOLDER__", forstr + ) ################################################################################ ## NOTE: we assume that roms.in is the ONLY entry in additional_code.namelists, hence [0] @@ -346,10 +395,12 @@ def pre_run(self): "MARBL_NAMELIST_DIR", self.additional_code.local_path + "/namelists/MARBL", ) + ################################################################################ def run( self, + n_time_steps: Optional[int] = None, account_key: Optional[str] = None, walltime: Optional[str] = _CSTAR_SYSTEM_MAX_WALLTIME, job_name: str = "my_roms_run", @@ -383,14 +434,39 @@ def run( + "\nIf you have already run Component.get(), either run it again or " + " add the local path manually using Component.additional_code.local_path='YOUR/PATH'." ) + return else: run_path = self.additional_code.local_path + "/output/PARTITIONED/" + # Add number of timesteps to namelist + # Check if n_time_steps is None, indicating it was not explicitly set + if n_time_steps is None: + n_time_steps = 1 + warnings.warn( + "n_time_steps not explicitly set, using default value of 1. " + "Please call ROMSComponent.run() with the n_time_steps argument " + "to specify the length of the run.", + UserWarning, + ) + assert isinstance(n_time_steps, int) + + if self.additional_code.namelists is None: + raise ValueError( + "A namelist file (typically roms.in) is needed to run ROMS." + " ROMSComponent.additional_code.namelists should be a non-empty list of filenames." + ) + + _replace_text_in_file( + self.additional_code.local_path + "/" + self.additional_code.namelists[0], + "__NTIMES_PLACEHOLDER__", + str(n_time_steps), + ) + os.makedirs(run_path, exist_ok=True) if self.exe_path is None: - # FIXME this only works if build() is called in the same session - print( - "C-STAR: Unable to find ROMS executable. Run Component.build() first." + raise ValueError( + "C-STAR: ROMSComponent.exe_path is None; unable to find ROMS executable." + + "\nRun Component.build() first. " + "\n If you have already run Component.build(), either run it again or " + " add the executable path manually using Component.exe_path='YOUR/PATH'." ) @@ -410,9 +486,9 @@ def run( exec_pfx = "mpirun" case "osx_arm64": exec_pfx = "mpirun" + case "linux_x86_64": + exec_pfx = "mpirun" - # FIXME (probably throughout): self.additional_code /could/ be a list - # need to figure out which element to use roms_exec_cmd = ( f"{exec_pfx} -n {self.n_procs_tot} {self.exe_path} " + f"{self.additional_code.local_path}/{self.additional_code.namelists[0]}" diff --git a/cstar_ocean/cstar_ocean/cstar_case.py b/cstar_ocean/cstar_ocean/cstar_case.py index 42dc3b5..d3730c6 100644 --- a/cstar_ocean/cstar_ocean/cstar_case.py +++ b/cstar_ocean/cstar_ocean/cstar_case.py @@ -1,5 +1,8 @@ import os import yaml +import warnings +import datetime as dt +import dateutil.parser from typing import List, Type, Any, Optional from cstar_ocean.component import Component, MARBLComponent, ROMSComponent @@ -28,6 +31,15 @@ class Case: The name of this case caseroot: str The local directory in which this case will be set up + valid_start_date: str or datetime.datetime, Optional, default=None + The earliest start date at which this Case is considered valid + valid_end_date: str or datetime.datetime, Optional, default=None + The latest end date up to which this Case is considered valid + start_date: str or datetime, Optional, default=valid_start_date + The date from which to begin running this Case. + end_date: str or datetime.datetime, Optional, default=valid_end_date + The date at which to cease running this Case. + is_from_blueprint: bool Whether this Case was instantiated from a blueprint yaml file @@ -51,7 +63,14 @@ class Case: """ def __init__( - self, components: Component | List[Component], name: str, caseroot: str + self, + components: Component | List[Component], + name: str, + caseroot: str, + start_date: Optional[str | dt.datetime] = None, + end_date: Optional[str | dt.datetime] = None, + valid_start_date: Optional[str | dt.datetime] = None, + valid_end_date: Optional[str | dt.datetime] = None, ): """ Initialize a Case object manually from components, name, and caseroot path. @@ -76,9 +95,99 @@ def __init__( self.name: str = name self.is_from_blueprint: bool = False self.blueprint: Optional[str] = None - self.is_setup: bool = self.check_is_setup() - # self.is_setup=self.check_is_setup() + # Make sure valid dates are datetime objects if present: + if valid_start_date is not None: + self.valid_start_date: Optional[dt.datetime] = ( + valid_start_date + if isinstance(valid_start_date, dt.datetime) + else dateutil.parser.parse(valid_start_date) + ) + if valid_end_date is not None: + self.valid_end_date: Optional[dt.datetime] = ( + valid_end_date + if isinstance(valid_end_date, dt.datetime) + else dateutil.parser.parse(valid_end_date) + ) + # Warn user if valid dates are not present: + if valid_end_date is None or valid_start_date is None: + warnings.warn( + "Range of valid dates not provided." + + " Unable to check if simulation dates are out of range. " + + "Case objects should be initialized with valid_start_date " + + "and valid_end_date attributes.", + RuntimeWarning, + ) + + # Make sure Case start_date is set and is a datetime object: + if start_date is not None: + # Set if provided + self.start_date: Optional[dt.datetime] = ( + start_date + if isinstance(start_date, dt.datetime) + else dateutil.parser.parse(start_date) + ) + # Set to earliest valid date if not provided and warn + elif valid_start_date is not None: + self.start_date = self.valid_start_date + warnings.warn( + "start_date not provided. " + + f"Defaulting to earliest valid start date: {valid_start_date}." + ) + else: + # Raise error if no way to set + raise ValueError( + "Neither start_date nor valid_start_date provided." + + " Unable to establish a simulation date range" + ) + assert isinstance( + self.start_date, dt.datetime + ), "At this point either the code has failed or start_date is a datetime object" + + # Make sure Case end_date is set and is a datetime object: + if end_date is not None: + # Set if provided + self.end_date: Optional[dt.datetime] = ( + end_date + if isinstance(end_date, dt.datetime) + else dateutil.parser.parse(end_date) + ) + elif valid_end_date is not None: + # Set to latest valid date if not provided and warn + self.end_date = self.valid_end_date + warnings.warn( + "end_date not provided." + + f"Defaulting to latest valid end date: {valid_end_date}" + ) + + else: + # Raise error if no way to set + raise ValueError( + "Neither end_date nor valid_end_date provided." + + " Unable to establish a simulation date range" + ) + + assert isinstance( + self.end_date, dt.datetime + ), "At this point either the code has failed or end_date is a datetime object" + + # Check provded dates are valid + if (self.valid_start_date is not None) and ( + self.start_date < self.valid_start_date + ): + raise ValueError( + f"start_date {self.start_date} is before the earliest valid start date {self.valid_start_date}." + ) + if (self.valid_end_date is not None) and (self.end_date > self.valid_end_date): + raise ValueError( + f"end_date {self.end_date} is after the latest valid end date {self.valid_end_date}." + ) + if self.start_date > self.end_date: + raise ValueError( + f"start_date {self.start_date} is after end_date {self.end_date}." + ) + # Lastly, check if everything is set up + self.is_setup: bool = self.check_is_setup() def __str__(self): base_str = "------------------" @@ -86,8 +195,13 @@ def __str__(self): base_str += "\n------------------" base_str += f"\nName: {self.name}" - base_str += f"\nLocal caseroot: {self.caseroot}" + base_str += f"\ncaseroot: {self.caseroot}" + base_str += f"\nstart_date: {self.start_date}" + base_str += f"\nend_date: {self.end_date}" base_str += f"\nIs setup: {self.is_setup}" + base_str += "\nValid date range:" + base_str += f"\nvalid_start_date: {self.valid_start_date}" + base_str += f"\nvalid_end_date: {self.valid_end_date}" base_str += "\n" if self.is_from_blueprint: @@ -106,7 +220,13 @@ def __repr__(self): return self.__str__() @classmethod - def from_blueprint(cls, blueprint: str, caseroot: str): + def from_blueprint( + cls, + blueprint: str, + caseroot: str, + start_date: Optional[str | dt.datetime], + end_date: Optional[str | dt.datetime], + ): """ Initialize a Case object from a blueprint. @@ -132,6 +252,10 @@ def from_blueprint(cls, blueprint: str, caseroot: str): Path to a yaml file containing the blueprint for the case caseroot: str Path to the local directory where the case will be curated and run + start_date: str or datetime, Optional, default=valid_start_date + The date from which to begin running this Case. + end_date: str or datetime.datetime, Optional, default=valid_end_date + The date at which to cease running this Case. Returns: -------- @@ -143,8 +267,18 @@ def from_blueprint(cls, blueprint: str, caseroot: str): with open(blueprint, "r") as file: bp_dict = yaml.safe_load(file) - # Primary metadata + # Top-level metadata casename = bp_dict["registry_attrs"]["name"] + + valid_start_date: dt.datetime + valid_end_date: dt.datetime + valid_start_date = bp_dict["registry_attrs"]["valid_date_range"]["start_date"] + valid_end_date = bp_dict["registry_attrs"]["valid_date_range"]["end_date"] + if isinstance(start_date, str): + start_date = dateutil.parser.parse(start_date) + if isinstance(end_date, str): + end_date = dateutil.parser.parse(end_date) + components: Component | List[Component] components = [] @@ -246,6 +380,8 @@ def from_blueprint(cls, blueprint: str, caseroot: str): base_model=base_model, source=f["source"], file_hash=f["hash"], + start_date=f["start_date"], + end_date=f["end_date"], ) for f in input_dataset_info["initial_conditions"]["files"] ] @@ -274,6 +410,8 @@ def from_blueprint(cls, blueprint: str, caseroot: str): base_model=base_model, source=f["source"], file_hash=f["hash"], + start_date=f["start_date"], + end_date=f["end_date"], ) for f in input_dataset_info["boundary_forcing"]["files"] ] @@ -288,6 +426,8 @@ def from_blueprint(cls, blueprint: str, caseroot: str): base_model=base_model, source=f["source"], file_hash=f["hash"], + start_date=f["start_date"], + end_date=f["end_date"], ) for f in input_dataset_info["surface_forcing"]["files"] ] @@ -301,7 +441,16 @@ def from_blueprint(cls, blueprint: str, caseroot: str): if len(components) == 1: components = components[0] - caseinstance = cls(components=components, name=casename, caseroot=caseroot) + caseinstance = cls( + components=components, + name=casename, + caseroot=caseroot, + start_date=start_date, + end_date=end_date, + valid_start_date=valid_start_date, + valid_end_date=valid_end_date, + ) + caseinstance.is_from_blueprint = True caseinstance.blueprint = blueprint @@ -325,6 +474,14 @@ def persist(self, filename: str): # Add metadata to dictionary bp_dict["registry_attrs"] = {"name": self.name} + if self.valid_start_date is not None: + bp_dict["registry_attrs"]["valid_date_range"] = { + "start_date": str(self.valid_start_date) + } + if self.valid_end_date is not None: + bp_dict["registry_attrs"]["valid_date_range"] = { + "end_date": str(self.valid_end_date) + } bp_dict["components"] = [] @@ -360,6 +517,8 @@ def persist(self, filename: str): discretization_info["n_procs_x"] = component.n_procs_x if hasattr(component, "n_procs_y"): discretization_info["n_procs_y"] = component.n_procs_y + if hasattr(component, "time_step"): + discretization_info["time_step"] = component.time_step if len(discretization_info) > 0: component_info["discretization"] = discretization_info @@ -444,28 +603,39 @@ def check_is_setup(self) -> bool: for component in component_list: if component.base_model.local_config_status != 0: - # print(f'{component.base_model.name} does not appear to be configured properly.'+\ - #'\nRun Case.setup() or BaseModel.handle_config_status()') return False # Check AdditionalCode - if isinstance(component.additional_code, list): - for ac in component.additional_code: - if not ac.check_exists_locally(self.caseroot): - return False - elif isinstance(component.additional_code, AdditionalCode): - if not component.additional_code.check_exists_locally(self.caseroot): - return False + if (component.additional_code is not None) and ( + component.additional_code.check_exists_locally(self.caseroot) + ): + return False # Check InputDatasets - if isinstance(component.input_datasets, list): - for ind in component.input_datasets: - if not ind.check_exists_locally(self.caseroot): + if isinstance(component.input_datasets, InputDataset): + dataset_list = [ + component.input_datasets, + ] + elif isinstance(component.input_datasets, list): + dataset_list = component.input_datasets + else: + dataset_list = [] + + for inp in dataset_list: + if not inp.check_exists_locally(self.caseroot): + # If it can't be found locally, check whether it should by matching dataset dates with simulation dates: + if (not isinstance(inp.start_date, dt.datetime)) or ( + not isinstance(inp.end_date, dt.datetime) + ): + return False + elif (not isinstance(self.start_date, dt.datetime)) or ( + not isinstance(self.end_date, dt.datetime) + ): + return False + elif (inp.start_date <= self.end_date) and ( + inp.end_date >= self.start_date + ): return False - elif isinstance(component.input_datasets, InputDataset): - if not component.input_datasets.check_exists_locally(self.caseroot): - return False - return True def setup(self): @@ -498,13 +668,25 @@ def setup(self): # Get InputDatasets # tgt_dir=self.caseroot+'/input_datasets/'+component.base_model.name - if isinstance(component.input_datasets, list): - [inp.get(self.caseroot) for inp in component.input_datasets] - elif isinstance(component.input_datasets, InputDataset): - component.input_dataset.get(self.caseroot) + if isinstance(component.input_datasets, InputDataset): + dataset_list = [ + component.input_datasets, + ] + elif isinstance(component.input_datasets, list): + dataset_list = component.input_datasets + else: + dataset_list = [] + + # Verify dates line up before running .get(): + for inp in dataset_list: + # Download input dataset if its date range overlaps Case's date range + if ((inp.start_date is None) or (inp.end_date is None)) or ( + (inp.start_date <= self.end_date) + and (inp.end_date >= self.start_date) + ): + inp.get(self.caseroot) self.is_setup = True - # TODO: Add a marker somewhere to avoid repeating this process, e.g. self.is_setup=True def build(self): """Compile any necessary additional code associated with this case @@ -531,10 +713,20 @@ def run( # Assuming for now that ROMS presence implies it is the master program # TODO add more advanced logic for this + # 20240807 - TN - set first component as main? for component in self.components: if component.base_model.name == "ROMS": + # Calculate number of time steps: + run_length_seconds = int( + (self.end_date - self.start_date).total_seconds() + ) + + # After that you need to run some verification stuff on the downloaded files component.run( - account_key=account_key, walltime=walltime, job_name=job_name + n_time_steps=(run_length_seconds // component.time_step), + account_key=account_key, + walltime=walltime, + job_name=job_name, ) def post_run(self): diff --git a/cstar_ocean/cstar_ocean/environment.py b/cstar_ocean/cstar_ocean/environment.py index 95f7707..20fb2ce 100644 --- a/cstar_ocean/cstar_ocean/environment.py +++ b/cstar_ocean/cstar_ocean/environment.py @@ -53,11 +53,15 @@ os.environ["MPI_ROOT"] = os.environ["MVAPICH2HOME"] _CSTAR_COMPILER = "intel" _CSTAR_SYSTEM = "sdsc_expanse" - _CSTAR_SCHEDULER = "slurm" + _CSTAR_SCHEDULER = ( + "slurm" # can get this with `scontrol show config` or `sinfo --version` + ) _CSTAR_SYSTEM_DEFAULT_PARTITION = "compute" - _CSTAR_SYSTEM_CORES_PER_NODE = 128 # cpu nodes - _CSTAR_SYSTEM_MEMGB_PER_NODE = 256 # cpu nodes - _CSTAR_SYSTEM_MAX_WALLTIME = "48:00:00" + _CSTAR_SYSTEM_CORES_PER_NODE = ( + 128 # cpu nodes, can get dynamically node-by-node + ) + _CSTAR_SYSTEM_MEMGB_PER_NODE = 256 # with `sinfo -o "%n %c %m %l"` + _CSTAR_SYSTEM_MAX_WALLTIME = "48:00:00" # (hostname/cpus/mem[MB]/walltime) case "derecho": module("load" "intel") @@ -75,11 +79,17 @@ _CSTAR_COMPILER = "intel" _CSTAR_SYSTEM = "ncar_derecho" - _CSTAR_SCHEDULER = "pbs" + _CSTAR_SCHEDULER = ( + "pbs" # can determine dynamically by testing for `qstat --version` + ) _CSTAR_SYSTEM_DEFAULT_PARTITION = "main" - _CSTAR_SYSTEM_CORES_PER_NODE = 128 # cpu nodes - _CSTAR_SYSTEM_MEMGB_PER_NODE = 256 # cpu nodes - _CSTAR_SYSTEM_MAX_WALLTIME = "12:00:00" + _CSTAR_SYSTEM_CORES_PER_NODE = ( + 128 # Harder to dynamically get this info on PBS + ) + _CSTAR_SYSTEM_MEMGB_PER_NODE = ( + 256 # Can combine `qstat -Qf` and `pbsnodes -a` + ) + _CSTAR_SYSTEM_MAX_WALLTIME = "12:00:00" # with grep or awk case "perlmutter": module("load", "cpu") @@ -111,9 +121,12 @@ _CSTAR_SYSTEM = "nersc_perlmutter" _CSTAR_SCHEDULER = "slurm" _CSTAR_SYSTEM_DEFAULT_PARTITION = "regular" - _CSTAR_SYSTEM_CORES_PER_NODE = 128 # cpu nodes - _CSTAR_SYSTEM_MEMGB_PER_NODE = 512 # cpu nodes - _CSTAR_SYSTEM_MAX_WALLTIME = "24:00:00" + _CSTAR_SYSTEM_CORES_PER_NODE = ( + 128 # cpu nodes, can get dynamically node-by-node + ) + _CSTAR_SYSTEM_MEMGB_PER_NODE = 512 # with `sinfo -o "%n %c %m %l"` + _CSTAR_SYSTEM_MAX_WALLTIME = "24:00:00" # (hostname/cpus/mem[MB]/walltime) + elif (platform.system() == "Darwin") and (platform.machine() == "arm64"): # if on MacOS arm64 all dependencies should have been installed by conda @@ -130,9 +143,31 @@ _CSTAR_SYSTEM = "osx_arm64" _CSTAR_SCHEDULER = None _CSTAR_SYSTEM_DEFAULT_PARTITION = None - _CSTAR_SYSTEM_CORES_PER_NODE = 12 # get from sysctl -n hw.ncpu + _CSTAR_SYSTEM_CORES_PER_NODE = os.cpu_count() + _CSTAR_SYSTEM_MEMGB_PER_NODE = None + _CSTAR_SYSTEM_MAX_WALLTIME = None + +elif ( + (platform.system() == "Linux") + and (platform.machine() == "x86_64") + and ("LMOD_DIR" not in list(os.environ)) +): + os.environ["MPIHOME"] = os.environ["CONDA_PREFIX"] + os.environ["NETCDFHOME"] = os.environ["CONDA_PREFIX"] + os.environ["LD_LIBRARY_PATH"] = ( + os.environ.get("LD_LIBRARY_PATH", default="") + + ":" + + os.environ["NETCDFHOME"] + + "/lib" + ) + _CSTAR_COMPILER = "gnu" + _CSTAR_SYSTEM = "linux_x86_64" + _CSTAR_SCHEDULER = None + _CSTAR_SYSTEM_DEFAULT_PARTITION = None + _CSTAR_SYSTEM_CORES_PER_NODE = os.cpu_count() _CSTAR_SYSTEM_MEMGB_PER_NODE = None _CSTAR_SYSTEM_MAX_WALLTIME = None + # TODO: lots of this is repeat code, can determine a lot of these vars using functions rather than hardcoding # Now read the local/custom initialisation file # This sets variables associated with external codebases that are not installed @@ -140,6 +175,9 @@ _CSTAR_CONFIG_FILE = _CSTAR_ROOT + "/cstar_local_config.py" if os.path.exists(_CSTAR_CONFIG_FILE): - pass + from cstar_ocean.cstar_local_config import set_local_environment + + set_local_environment() + ################################################################################ diff --git a/cstar_ocean/cstar_ocean/input_dataset.py b/cstar_ocean/cstar_ocean/input_dataset.py index ca85562..6977c38 100644 --- a/cstar_ocean/cstar_ocean/input_dataset.py +++ b/cstar_ocean/cstar_ocean/input_dataset.py @@ -1,6 +1,9 @@ import os import pooch import hashlib +import datetime as dt +import dateutil.parser +from typing import Optional from cstar_ocean.base_model import BaseModel @@ -29,7 +32,14 @@ class InputDataset: Verify whether the file containing this input dataset has been fetched to `local_path` """ - def __init__(self, base_model: BaseModel, source: str, file_hash: str): + def __init__( + self, + base_model: BaseModel, + source: str, + file_hash: str, + start_date: Optional[str | dt.datetime] = None, + end_date: Optional[str | dt.datetime] = None, + ): """ Initialize an InputDataset object associated with a base model using a source URL and file hash @@ -47,8 +57,17 @@ def __init__(self, base_model: BaseModel, source: str, file_hash: str): self.base_model: BaseModel = base_model self.source: str = source self.file_hash: str = file_hash - self.exists_locally: bool | None = None - self.local_path: str | None = None + self.exists_locally: Optional[bool] = None + self.local_path: Optional[str] = None + self.start_date = start_date + self.end_date = end_date + if isinstance(start_date, str): + self.start_date = dateutil.parser.parse(start_date) + if isinstance(end_date, str): + self.end_date = dateutil.parser.parse(end_date) + + assert self.start_date is None or isinstance(self.start_date, dt.datetime) + assert self.end_date is None or isinstance(self.end_date, dt.datetime) def __str__(self): name = self.__class__.__name__ @@ -58,6 +77,10 @@ def __str__(self): base_str += f"\nBase model: {self.base_model.name}" base_str += f"\nRemote path URL: {self.source}" + if self.start_date is not None: + base_str += f"\nstart_date: {self.start_date}" + if self.end_date is not None: + base_str += f"\nend_date: {self.end_date}" if self.exists_locally is not None: base_str += f"\n Exists locally: {self.exists_locally}" if self.local_path is not None: @@ -93,7 +116,8 @@ def get(self, local_path: str): ) to_fetch.fetch(os.path.basename(self.source), downloader=downloader) - self.local_path = tgt_dir + self.exists_locally = True + self.local_path = tgt_dir + "/" + os.path.basename(self.source) def check_exists_locally(self, local_path: str) -> bool: """ diff --git a/cstar_ocean/cstar_ocean/utils.py b/cstar_ocean/cstar_ocean/utils.py index 3d2675c..b3fee56 100644 --- a/cstar_ocean/cstar_ocean/utils.py +++ b/cstar_ocean/cstar_ocean/utils.py @@ -14,12 +14,47 @@ def _write_to_config_file(config_file_str): ) base_conf_str += "\nimport os\n" + base_conf_str += "def set_local_environment():" config_file_str = base_conf_str + config_file_str with open(_CSTAR_CONFIG_FILE, "a") as f: f.write(config_file_str) +def _clone_and_checkout( + source_repo: str, local_path: str, checkout_target: str +) -> None: + """Clone `source_repo` to `local_path` and checkout `checkout_target`.""" + clone_result = subprocess.run( + f"git clone {source_repo} {local_path}", + shell=True, + capture_output=True, + text=True, + ) + if clone_result.returncode != 0: + raise RuntimeError( + f"Error {clone_result.returncode} when cloning repository " + + f"{source_repo} to {local_path}. Error messages: " + + f"\n{clone_result.stderr}" + ) + print(f"Cloned repository {source_repo} to {local_path}") + + checkout_result = subprocess.run( + f"git checkout {checkout_target}", + cwd=local_path, + shell=True, + capture_output=True, + text=True, + ) + if checkout_result.returncode != 0: + raise RuntimeError( + f"Error {checkout_result.returncode} when checking out " + + f"{checkout_target} in git repository {local_path}. Error messages: " + + f"\n{checkout_result.stderr}" + ) + print(f"Checked out {checkout_target} in git repository {local_path}") + + def _get_repo_remote(local_root): """Take a local repository path string (local_root) and return as a string the remote URL""" return subprocess.run( diff --git a/cstar_ocean/examples/cstar_blueprint_roms_marbl_example.yaml b/cstar_ocean/examples/cstar_blueprint_roms_marbl_example.yaml index 9d6cfe3..97a065b 100644 --- a/cstar_ocean/examples/cstar_blueprint_roms_marbl_example.yaml +++ b/cstar_ocean/examples/cstar_blueprint_roms_marbl_example.yaml @@ -1,11 +1,8 @@ registry_attrs: name: roms_marbl_example -# valid_date_range: 80s-10s - -default_run_parameters: - start_date: 2012-01-03 - end_date: 2012-12-31 - + valid_date_range: + start_date: 2012-01-03 12:00:00 + end_date: 2012-12-31 23:00:00 components: - component: @@ -15,7 +12,7 @@ components: checkout_target: 'marbl0.45.0' additional_code: source_repo: '/~https://github.com/dafyddstephenson/roms_marbl_example.git' - checkout_target: '9f66e60bea0553868268c75571aba67df94356ce' + checkout_target: 'cstar_latest' namelists: - "namelists/MARBL/marbl_in" - "namelists/MARBL/marbl_tracer_output_list" @@ -25,16 +22,17 @@ components: base_model: name: 'ROMS' source_repo: '/~https://github.com/CESR-lab/ucla-roms.git' - checkout_target: '1ab203d5334a17bf398cd86765b9fa268e0065a8' + checkout_target: '2a4e9107ffc771d2833fbea3e08f1e169759b5ff' discretization: nx: 24 ny: 24 n_levels: 100 n_procs_x: 3 n_procs_y: 3 + time_step: 360 additional_code: source_repo: '/~https://github.com/dafyddstephenson/roms_marbl_example.git' - checkout_target: '9f66e60bea0553868268c75571aba67df94356ce' + checkout_target: 'cstar_latest' source_mods: - "source_mods/ROMS/bgc.opt" - "source_mods/ROMS/bulk_frc.opt" @@ -47,7 +45,7 @@ components: - "source_mods/ROMS/get_makefile" - "source_mods/ROMS/Make.depend" namelists: - - "namelists/ROMS/roms.in_MARBL" + - "namelists/ROMS/roms.in_TEMPLATE" input_datasets: model_grid: files: @@ -57,6 +55,8 @@ components: files: - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/MARBL_rst.20120103120000.nc' hash: 'fc3bbd039256edc89c898efda0eebc5c53773995598d59310bc6d57f454a6ddd' + start_date: 2012-01-03 11:59:24 + end_date: 2012-01-03 12:00:00 tidal_forcing: files: - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_tides.nc' @@ -65,33 +65,63 @@ components: files: - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_bry_2012.nc' hash: 'c3b0e14aae6dd5a0d54703fa04cf95960c1970e732c0a230427bf8b0fbbd8bf1' + start_date: 2012-01-01 01:00:00 + end_date: 2012-12-30 00:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_bry_bgc_MARBL.nc' hash: '897a8df8ed45841a98b3906f2dd07750decc5c2b50095ba648a855c869c7d3ee' + start_date: 2012-01-01 01:00:00 + end_date: 2012-12-01 01:00:00 surface_forcing: files: - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc_bgc.nc' hash: '621dd23691d87aa93c5cc582daf6c5f18333ed062ff934777d50b63346c3f84d' + start_date: 2012-01-01 01:00:00 + end_date: 2012-11-30 19:30:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201201.nc' hash: '923049a9c2ab9ce77fa4a0211585e6848a12e87bf237e7aa310f693c3ac6abfa' + start_date: 2012-01-01 01:00:00 + end_date: 2012-01-31 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201202.nc' hash: '5a5d99cdfaacdcda7b531916f6af0f7cef4aea595ea634dac809226ea2a8a4fe' + start_date: 2012-02-01 00:00:00 + end_date: 2012-02-29 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201203.nc' hash: '8251bd08d435444da7c38fe11eba082365ee7b68453b6dc61460ddcb72c07671' + start_date: 2012-03-01 00:00:00 + end_date: 2012-03-31 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201204.nc' hash: '0b62ab974bd718af1d421a715dc2b0968f65ec99856513f2ee988d996ff3d059' + start_date: 2012-04-01 00:00:00 + end_date: 2012-04-30 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201205.nc' hash: 'b82797f91c0741245e58b90f787c9597f342faa49c45ebb27e2df964006d6df5' + start_date: 2012-05-01 00:00:00 + end_date: 2012-05-31 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201206.nc' hash: '8cf6f2413ae45dddc1680a19aea0d40a04def82366d626a7fe33dfe5eef7ea7f' + start_date: 2012-06-01 00:00:00 + end_date: 2012-06-30 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201207.nc' hash: '4ec7284f2bdc222b961483af5f6a01ecd6feea5236bb57d2101171f38ea8653b' + start_date: 2012-07-01 00:00:00 + end_date: 2012-07-31 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201208.nc' hash: '4eec008592337e0da87c2fac8c41a1400cc7067fcdc146a665db5b3a74213828' + start_date: 2012-08-01 00:00:00 + end_date: 2012-08-31 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201209.nc' hash: 'feb5718c45c4d0874919367fbadfca6784dfddaa2b193ef767a37d92a554eed4' + start_date: 2012-09-01 00:00:00 + end_date: 2012-09-30 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201210.nc' hash: '74538789218a2815c5a5532756e1282958d22026da7513ced0131febfce1012b' + start_date: 2012-10-01 00:00:00 + end_date: 2012-10-31 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201211.nc' hash: 'c79d4b2a9d1c41f9c603454c2b023995a6c3ea78c01d17b7428257c3c66f8750' + start_date: 2012-11-01 00:00:00 + end_date: 2012-11-30 23:00:00 - source: '/~https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201212.nc' hash: '477d1c0f2abcb0d5227594777521ce30d30c2376f5a8b2f08c25e25a77fd1fa5' + start_date: 2012-12-01 00:00:00 + end_date: 2012-12-31 23:00:00 diff --git a/cstar_ocean/examples/cstar_example.py b/cstar_ocean/examples/cstar_example.py index 96d8a0f..4b6668b 100644 --- a/cstar_ocean/examples/cstar_example.py +++ b/cstar_ocean/examples/cstar_example.py @@ -3,13 +3,14 @@ For more details, see ../README.md or cstar_example_notebook.ipynb """ -import os import cstar_ocean as cstar roms_marbl_case = cstar.Case.from_blueprint( blueprint="cstar_blueprint_roms_marbl_example.yaml", caseroot="roms_marbl_example_case/", + start_date="20120103 12:00:00", + end_date="20120104 12:00:00", ) ## In a python session, execute: diff --git a/cstar_ocean/tests/test_roms_marbl_example.py b/cstar_ocean/tests/test_roms_marbl_example.py new file mode 100644 index 0000000..947602b --- /dev/null +++ b/cstar_ocean/tests/test_roms_marbl_example.py @@ -0,0 +1,21 @@ +from unittest.mock import patch +import cstar_ocean as cstar + +roms_marbl_case = cstar.Case.from_blueprint( + blueprint=(cstar.environment._CSTAR_ROOT) + + "/../examples/cstar_blueprint_roms_marbl_example.yaml", + caseroot="roms_marbl_example_case/", + start_date="20120103 12:00:00", + end_date="20120103 12:30:00", +) + +# patch will automatically respond "y" to any call for input +with patch("builtins.input", return_value="y"): + roms_marbl_case.setup() + roms_marbl_case.persist("test_blueprint.yaml") + roms_marbl_case.build() + roms_marbl_case.pre_run() + roms_marbl_case.run() + roms_marbl_case.post_run() + +print("TEST COMPLETE!")