Skip to content

Commit

Permalink
Merge pull request #2886 from djhoese/bugfix-pyhdf-tokenize
Browse files Browse the repository at this point in the history
Update pyhdf-based arrs to be manually tokenized
  • Loading branch information
djhoese authored Aug 23, 2024
2 parents 278d00b + 5e27be4 commit 4a75b65
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 9 deletions.
22 changes: 17 additions & 5 deletions satpy/readers/hdf4_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
"""Helpers for reading hdf4-based files."""

import logging
import os

import dask.array as da
import numpy as np
import xarray as xr
from dask.base import tokenize
from pyhdf.SD import SD, SDC, SDS

from satpy.readers.file_handlers import BaseFileHandler
Expand All @@ -45,12 +47,22 @@
}


def from_sds(var, *args, **kwargs):
def from_sds(var, src_path, **kwargs):
"""Create a dask array from a SD dataset."""
var.__dict__["dtype"] = np.dtype(HTYPE_TO_DTYPE[var.info()[3]])
shape = var.info()[2]
var_info = var.info()
var.__dict__["dtype"] = np.dtype(HTYPE_TO_DTYPE[var_info[3]])
shape = var_info[2]
var.__dict__["shape"] = shape if isinstance(shape, (tuple, list)) else tuple(shape)
return da.from_array(var, *args, **kwargs)

name = kwargs.pop("name", None)
if name is None:
var_name = var_info[0]
tokenize_args = (os.fspath(src_path), var_name)
if kwargs:
tokenize_args += (kwargs,)
# put variable name in the front for easier dask debugging
name = var_name + "-" + tokenize(*tokenize_args)
return da.from_array(var, name=name, **kwargs)


class HDF4FileHandler(BaseFileHandler):
Expand Down Expand Up @@ -92,7 +104,7 @@ def collect_metadata(self, name, obj):

def _open_xarray_dataset(self, val, chunks=CHUNK_SIZE):
"""Read the band in blocks."""
dask_arr = from_sds(val, chunks=chunks)
dask_arr = from_sds(val, self.filename, chunks=chunks)
attrs = val.attributes()
return xr.DataArray(dask_arr, dims=("y", "x"),
attrs=attrs)
Expand Down
2 changes: 1 addition & 1 deletion satpy/readers/hdfeos_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def load_dataset(self, dataset_name, is_category=False):

dataset = self._read_dataset_in_file(dataset_name)
chunks = self._chunks_for_variable(dataset)
dask_arr = from_sds(dataset, chunks=chunks)
dask_arr = from_sds(dataset, self.filename, chunks=chunks)
dims = ("y", "x") if dask_arr.ndim == 2 else None
data = xr.DataArray(dask_arr, dims=dims,
attrs=dataset.attributes())
Expand Down
4 changes: 2 additions & 2 deletions satpy/readers/modis_l1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def get_dataset(self, key, info):
var_attrs = subdata.attributes()
uncertainty = self.sd.select(var_name + "_Uncert_Indexes")
chunks = self._chunks_for_variable(subdata)
array = xr.DataArray(from_sds(subdata, chunks=chunks)[band_index, :, :],
array = xr.DataArray(from_sds(subdata, self.filename, chunks=chunks)[band_index, :, :],
dims=["y", "x"]).astype(np.float32)
valid_range = var_attrs["valid_range"]
valid_min = np.float32(valid_range[0])
Expand Down Expand Up @@ -214,7 +214,7 @@ def _mask_uncertain_pixels(self, array, uncertainty, band_index):
if not self._mask_saturated:
return array
uncertainty_chunks = self._chunks_for_variable(uncertainty)
band_uncertainty = from_sds(uncertainty, chunks=uncertainty_chunks)[band_index, :, :]
band_uncertainty = from_sds(uncertainty, self.filename, chunks=uncertainty_chunks)[band_index, :, :]
array = array.where(band_uncertainty < 15)
return array

Expand Down
2 changes: 1 addition & 1 deletion satpy/readers/modis_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def read_geo_resolution(metadata):
def _select_hdf_dataset(self, hdf_dataset_name, byte_dimension):
"""Load a dataset from HDF-EOS level 2 file."""
dataset = self.sd.select(hdf_dataset_name)
dask_arr = from_sds(dataset, chunks=CHUNK_SIZE)
dask_arr = from_sds(dataset, self.filename, chunks=CHUNK_SIZE)
attrs = dataset.attributes()
dims = ["y", "x"]
if byte_dimension == 0:
Expand Down

0 comments on commit 4a75b65

Please sign in to comment.