Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

change x to x_osgb, similar for y #558

Merged
merged 1 commit into from
Dec 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions nowcasting_dataset/data_sources/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,14 +469,14 @@ def get_example(
x_meters_center=x_meters_center, y_meters_center=y_meters_center
)
selected_data = selected_data.sel(
x=slice(bounding_box.left, bounding_box.right),
y=slice(bounding_box.top, bounding_box.bottom),
x_osgb=slice(bounding_box.left, bounding_box.right),
y_osgb=slice(bounding_box.top, bounding_box.bottom),
)

# selected_sat_data is likely to have 1 too many pixels in x and y
# because sel(x=slice(a, b)) is [a, b], not [a, b). So trim:
selected_data = selected_data.isel(
x=slice(0, self._square.size_pixels), y=slice(0, self._square.size_pixels)
x_osgb=slice(0, self._square.size_pixels), y_osgb=slice(0, self._square.size_pixels)
)

selected_data = self._post_process_example(selected_data, t0_dt)
Expand Down
16 changes: 8 additions & 8 deletions nowcasting_dataset/data_sources/fake/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,10 @@ def topographic_fake(batch_size, image_size_pixels, metadata: Optional[Metadata]
image_size_pixels,
image_size_pixels,
),
dims=["x", "y"],
dims=["x_osgb", "y_osgb"],
coords=dict(
x=x,
y=y,
x_osgb=x,
y_osgb=y,
),
name="data",
)
Expand All @@ -436,7 +436,7 @@ def topographic_fake(batch_size, image_size_pixels, metadata: Optional[Metadata]


def create_image_array(
dims=("time", "x", "y", "channels"),
dims=("time", "x_osgb", "y_osgb", "channels"),
seq_length=19,
history_seq_length=5,
image_size_pixels=64,
Expand All @@ -461,8 +461,8 @@ def create_image_array(

ALL_COORDS = {
"time": time,
"x": x,
"y": y,
"x_osgb": x,
"y_osgb": y,
"channels": np.array(channels),
}
coords = [(dim, ALL_COORDS[dim]) for dim in dims]
Expand Down Expand Up @@ -571,8 +571,8 @@ def create_gsp_pv_dataset(
)

data["capacity_mwp"] = capacity
data["x_coords"] = x_coords
data["y_coords"] = y_coords
data["x_osgb"] = x_coords
data["y_osgb"] = y_coords

# Add 1000 to the id numbers for the row numbers.
# This is a quick way to make sure row number is different from id,
Expand Down
4 changes: 2 additions & 2 deletions nowcasting_dataset/data_sources/gsp/gsp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ def get_example(
data=gsp_y_coords.values,
dims=["id"],
)
gsp["x_coords"] = gsp_x_coords
gsp["y_coords"] = gsp_y_coords
gsp["x_osgb"] = gsp_x_coords
gsp["y_osgb"] = gsp_y_coords

# pad out so that there are always 32 gsp, fill with 0
pad_n = self.n_gsp_per_example - len(gsp.id)
Expand Down
8 changes: 4 additions & 4 deletions nowcasting_dataset/data_sources/gsp/gsp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class GSP(DataSourceOutput):
_expected_data_vars = (
"power_mw",
"capacity_mwp",
"x_coords",
"y_coords",
"x_osgb",
"y_osgb",
)

@classmethod
Expand All @@ -28,8 +28,8 @@ def model_validation(cls, v):
v.check_data_var_dim(v.power_mw, ("example", "time_index", "id_index"))
v.check_data_var_dim(v.capacity_mwp, ("example", "time_index", "id_index"))
v.check_data_var_dim(v.time, ("example", "time_index"))
v.check_data_var_dim(v.x_coords, ("example", "id_index"))
v.check_data_var_dim(v.y_coords, ("example", "id_index"))
v.check_data_var_dim(v.x_osgb, ("example", "id_index"))
v.check_data_var_dim(v.y_osgb, ("example", "id_index"))

return v

Expand Down
1 change: 1 addition & 0 deletions nowcasting_dataset/data_sources/nwp/nwp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def open(self) -> None:
"""
self._data = self._open_data()
np.testing.assert_array_equal(self._data["variable"].values, self.channels)
self._data = self._data.rename({"x": "x_osgb", "y": "y_osgb"})

def _open_data(self) -> xr.DataArray:
data = open_nwp(self.zarr_path, consolidated=self.consolidated)
Expand Down
4 changes: 2 additions & 2 deletions nowcasting_dataset/data_sources/nwp/nwp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class NWP(DataSourceOutput):
# Use to store xr.Dataset data

__slots__ = ()
_expected_dimensions = ("time", "x", "y", "channels")
_expected_dimensions = ("time", "x_osgb", "y_osgb", "channels")
_expected_data_vars = ("data",)

@classmethod
Expand All @@ -24,7 +24,7 @@ def model_validation(cls, v):
v.check_nan_and_inf(data=v.data)

v.check_data_var_dim(
v.data, ("example", "time_index", "x_index", "y_index", "channels_index")
v.data, ("example", "time_index", "x_osgb_index", "y_osgb_index", "channels_index")
)

return v
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def _put_predictions_into_data_array(
data=predictions,
coords=(
("time", datetime_index_of_predictions),
("x", satellite_data_cropped.coords["x"].values),
("y", satellite_data_cropped.coords["y"].values),
("x_osgb", satellite_data_cropped.coords["x_osgb"].values),
("y_osgb", satellite_data_cropped.coords["y_osgb"].values),
("channels", satellite_data.coords["channels"].values),
),
name="data",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class OpticalFlow(DataSourceOutput):
"""Class to store optical flow data as a xr.Dataset with some validation"""

__slots__ = ()
_expected_dimensions = ("time", "x", "y", "channels")
_expected_dimensions = ("time", "x_osgb", "y_osgb", "channels")
_expected_data_vars = ("data",)

@classmethod
Expand Down
4 changes: 2 additions & 2 deletions nowcasting_dataset/data_sources/pv/pv_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ def get_example(
data=pv_system_row_number,
dims=["id"],
)
pv["x_coords"] = x_coords
pv["y_coords"] = y_coords
pv["x_osgb"] = x_coords
pv["y_osgb"] = y_coords
pv["pv_system_row_number"] = pv_system_row_number

# pad out so that there are always n_pv_systems_per_example, pad with zeros
Expand Down
8 changes: 4 additions & 4 deletions nowcasting_dataset/data_sources/pv/pv_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ class PV(DataSourceOutput):
"power_mw",
"capacity_mwp",
"pv_system_row_number",
"x_coords",
"y_coords",
"x_osgb",
"y_osgb",
)

@classmethod
Expand All @@ -29,8 +29,8 @@ def model_validation(cls, v):
v.check_data_var_dim(v.power_mw, ("example", "time_index", "id_index"))
v.check_data_var_dim(v.capacity_mwp, ("example", "id_index"))
v.check_data_var_dim(v.time, ("example", "time_index"))
v.check_data_var_dim(v.x_coords, ("example", "id_index"))
v.check_data_var_dim(v.y_coords, ("example", "id_index"))
v.check_data_var_dim(v.x_osgb, ("example", "id_index"))
v.check_data_var_dim(v.y_osgb, ("example", "id_index"))
v.check_data_var_dim(v.pv_system_row_number, ("example", "id_index"))

return v
Expand Down
23 changes: 13 additions & 10 deletions nowcasting_dataset/data_sources/satellite/satellite_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def open(self) -> None:
self._data = self._open_data()
if "variable" in self._data.dims:
self._data = self._data.rename({"variable": "channels"})
self._data = self._data.rename({"x": "x_osgb", "y": "y_osgb"})
if not set(self.channels).issubset(self._data.channels.values):
raise RuntimeError(
f"One or more requested channels are not available in {self.zarr_path}!"
Expand Down Expand Up @@ -101,11 +102,13 @@ def get_spatial_region_of_interest(
The selected data around the center
"""
# Get the index into x and y nearest to x_center_osgb and y_center_osgb:
x_index_at_center = np.searchsorted(data_array.x.values, x_center_osgb) - 1
y_index_at_center = np.searchsorted(data_array.y.values, y_center_osgb) - 1
x_index_at_center = np.searchsorted(data_array.x_osgb.values, x_center_osgb) - 1
y_index_at_center = np.searchsorted(data_array.y_osgb.values, y_center_osgb) - 1
# Put x_index_at_center and y_index_at_center into a pd.Series so we can operate
# on them both in a single line of code.
x_and_y_index_at_center = pd.Series({"x": x_index_at_center, "y": y_index_at_center})
x_and_y_index_at_center = pd.Series(
{"x_osgb": x_index_at_center, "y_osgb": y_index_at_center}
)
half_image_size_pixels = self._square.size_pixels // 2
min_x_and_y_index = x_and_y_index_at_center - half_image_size_pixels
max_x_and_y_index = x_and_y_index_at_center + half_image_size_pixels
Expand All @@ -114,8 +117,8 @@ def get_spatial_region_of_interest(
suggested_reduction_of_image_size_pixels = (
max(
(-min_x_and_y_index.min() if (min_x_and_y_index < 0).any() else 0),
(max_x_and_y_index.x - len(data_array.x)),
(max_x_and_y_index.y - len(data_array.y)),
(max_x_and_y_index.x_osgb - len(data_array.x_osgb)),
(max_x_and_y_index.y_osgb - len(data_array.y_osgb)),
)
* 2
)
Expand All @@ -129,9 +132,9 @@ def get_spatial_region_of_interest(
"Requested region of interest of satellite data steps outside of the available"
" geographical extent of the Zarr data. The requested region of interest extends"
f" from pixel indicies"
f" x={min_x_and_y_index.x} to x={max_x_and_y_index.x},"
f" y={min_x_and_y_index.y} to y={max_x_and_y_index.y}. In the Zarr data,"
f" len(x)={len(data_array.x)}, len(y)={len(data_array.y)}. Try reducing"
f" x={min_x_and_y_index.x_osgb} to x={max_x_and_y_index.x_osgb},"
f" y={min_x_and_y_index.y_osgb} to y={max_x_and_y_index.y_osgb}. In the Zarr data,"
f" len(x)={len(data_array.x_osgb)}, len(y)={len(data_array.y_osgb)}. Try reducing"
f" image_size_pixels from {self._square.size_pixels} to"
f" {new_suggested_image_size_pixels} pixels."
)
Expand All @@ -140,8 +143,8 @@ def get_spatial_region_of_interest(
# Note that isel is *exclusive* of the end of the slice.
# e.g. isel(x=slice(0, 3)) will return the first, second, and third values.
data_array = data_array.isel(
x=slice(min_x_and_y_index.x, max_x_and_y_index.x),
y=slice(min_x_and_y_index.y, max_x_and_y_index.y),
x_osgb=slice(min_x_and_y_index.x_osgb, max_x_and_y_index.x_osgb),
y_osgb=slice(min_x_and_y_index.y_osgb, max_x_and_y_index.y_osgb),
)
return data_array

Expand Down
6 changes: 3 additions & 3 deletions nowcasting_dataset/data_sources/satellite/satellite_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Satellite(DataSourceOutput):
"""Class to store satellite data as a xr.Dataset with some validation"""

__slots__ = ()
_expected_dimensions = ("time", "x", "y", "channels")
_expected_dimensions = ("time", "x_osgb", "y_osgb", "channels")
_expected_data_vars = ("data",)

@classmethod
Expand All @@ -22,7 +22,7 @@ def model_validation(cls, v):
# previously nans were filled with -1s, so lets make sure there are none
v.check_dataset_not_equal(data=v.data, value=-1)
v.check_data_var_dim(
v.data, ("example", "time_index", "x_index", "y_index", "channels_index")
v.data, ("example", "time_index", "x_osgb_index", "y_osgb_index", "channels_index")
)

return v
Expand All @@ -32,5 +32,5 @@ class HRVSatellite(Satellite):
"""Class to store HRV satellite data as a xr.Dataset with some validation"""

__slots__ = ()
_expected_dimensions = ("time", "x", "y", "channels")
_expected_dimensions = ("time", "x_osgb", "y_osgb", "channels")
_expected_data_vars = ("data",)
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,13 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
)

self._data = self._data.fillna(0) # Set nodata values to 0 (mostly should be ocean)
self._data = self._data.rename({"x": "x_osgb", "y": "y_osgb"})
# Add CRS for later, topo maps are assumed to be in OSGB
self._data.attrs["crs"] = OSGB
# Distance between pixels, giving their spatial extant, in meters
self._stored_pixel_size_meters = abs(self._data.coords["x"][1] - self._data.coords["x"][0])
self._stored_pixel_size_meters = abs(
self._data.coords["x_osgb"][1] - self._data.coords["x_osgb"][0]
)
self._meters_per_pixel = meters_per_pixel

@staticmethod
Expand Down Expand Up @@ -72,8 +75,8 @@ def get_example(
x_meters_center=x_meters_center, y_meters_center=y_meters_center
)
selected_data = self._data.sel(
x=slice(bounding_box.left, bounding_box.right),
y=slice(bounding_box.top, bounding_box.bottom),
x_osgb=slice(bounding_box.left, bounding_box.right),
y_osgb=slice(bounding_box.top, bounding_box.bottom),
)
if self._stored_pixel_size_meters != self._meters_per_pixel:
# Rescale here to the exact size, assumes that the above is good slice
Expand All @@ -87,7 +90,7 @@ def get_example(
# selected_data is likely to have 1 too many pixels in x and y
# because sel(x=slice(a, b)) is [a, b], not [a, b). So trim:
selected_data = selected_data.isel(
x=slice(0, self._square.size_pixels), y=slice(0, self._square.size_pixels)
x_osgb=slice(0, self._square.size_pixels), y_osgb=slice(0, self._square.size_pixels)
)

selected_data = self._post_process_example(selected_data, t0_dt)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Topographic(DataSourceOutput):
"""Class to store topographic data as a xr.Dataset with some validation"""

__slots__ = ()
_expected_dimensions = ("x", "y")
_expected_dimensions = ("x_osgb", "y_osgb")
_expected_data_vars = ("data",)

@classmethod
Expand All @@ -16,6 +16,6 @@ def model_validation(cls, v):

v.check_nan_and_inf(data=v.data)

v.check_data_var_dim(v.data, ("example", "x_index", "y_index"))
v.check_data_var_dim(v.data, ("example", "x_osgb_index", "y_osgb_index"))

return v
8 changes: 5 additions & 3 deletions nowcasting_dataset/filesystem/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,11 @@ def download_to_local(remote_filename: str, local_filename: str):
filesystem = get_filesystem(remote_filename)
try:
filesystem.get(remote_filename, local_filename)
except FileNotFoundError:
_LOG.error(f"Could not find {remote_filename}")
raise FileNotFoundError(f"Could not find {remote_filename}")
except FileNotFoundError as e:
_LOG.error(e)
message = f"Could not copy {remote_filename} to {local_filename}"
_LOG.error(message)
raise FileNotFoundError(message)


def upload_one_file(remote_filename: str, local_filename: str, overwrite: bool = True):
Expand Down
12 changes: 6 additions & 6 deletions tests/data_sources/fake/test_fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ def test_model(configuration): # noqa: D103

for i in range(configuration.process.batch_size):
for data_source_name in ["satellite", "hrvsatellite", "opticalflow", "topographic", "nwp"]:
assert x_center_osgb[i] <= batch.__getattribute__(data_source_name).x.max()
assert x_center_osgb[i] >= batch.__getattribute__(data_source_name).x.min()
assert y_center_osgb[i] <= batch.__getattribute__(data_source_name).y.max()
assert y_center_osgb[i] >= batch.__getattribute__(data_source_name).y.min()
assert x_center_osgb[i] <= batch.__getattribute__(data_source_name).x_osgb.max()
assert x_center_osgb[i] >= batch.__getattribute__(data_source_name).x_osgb.min()
assert y_center_osgb[i] <= batch.__getattribute__(data_source_name).y_osgb.max()
assert y_center_osgb[i] >= batch.__getattribute__(data_source_name).y_osgb.min()
# check first system is the center coordinates
for data_source_name in ["gsp", "pv"]:
assert x_center_osgb[i] == batch.__getattribute__(data_source_name).x_coords[i, 0]
assert y_center_osgb[i] == batch.__getattribute__(data_source_name).y_coords[i, 0]
assert x_center_osgb[i] == batch.__getattribute__(data_source_name).x_osgb[i, 0]
assert y_center_osgb[i] == batch.__getattribute__(data_source_name).y_osgb[i, 0]
2 changes: 1 addition & 1 deletion tests/data_sources/fake/test_fake_datasource_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_satellite(configuration):

s = satellite_fake(configuration=configuration)

assert s.x is not None
assert s.x_osgb is not None


def test_sun():
Expand Down
10 changes: 5 additions & 5 deletions tests/data_sources/gsp/test_gsp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ def test_gsp_pv_data_source_get_example():
)

assert len(example.id) == len(example.power_mw[0])
assert len(example.x_coords) == len(example.y_coords)
assert len(example.x_coords) > 0
assert len(example.x_osgb) == len(example.y_osgb)
assert len(example.x_osgb) > 0
assert pd.Timestamp(example.time[0].values) <= end_dt
assert pd.Timestamp(example.time[0].values) >= start_dt

Expand Down Expand Up @@ -152,9 +152,9 @@ def test_gsp_pv_data_source_get_batch():
)

assert len(batch.power_mw[0]) == 4
assert len(batch.id[0]) == len(batch.x_coords[0])
assert len(batch.x_coords[1]) == len(batch.y_coords[1])
assert len(batch.x_coords[2]) > 0
assert len(batch.id[0]) == len(batch.x_osgb[0])
assert len(batch.x_osgb[1]) == len(batch.y_osgb[1])
assert len(batch.x_osgb[2]) > 0
# assert T0_DT in batch[3].keys()


Expand Down
Loading