Skip to content

Commit

Permalink
support track_order on dataset attributes (#247)
Browse files Browse the repository at this point in the history
* support track_order on dataset attributes

* fix testcase

* version bump
  • Loading branch information
jreadey authored Jan 3, 2025
1 parent 880721b commit 8734376
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 58 deletions.
8 changes: 4 additions & 4 deletions h5pyd/_hl/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def __len__(self):
def __iter__(self):
""" Iterate over the names of attributes. """
if self._objdb_attributes is not None:
if self._parent._track_order:
if self._parent.track_order:
attrs = sorted(self._objdb_attributes.items(), key=lambda x: x[1]['created'])
else:
attrs = sorted(self._objdb_attributes.items())
Expand All @@ -469,7 +469,7 @@ def __iter__(self):
req = self._req_prefix
# backup over the trailing slash in req
req = req[:-1]
rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent._track_order else "0"})
rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent.track_order else "0"})
attributes = rsp['attributes']

attrlist = []
Expand Down Expand Up @@ -505,7 +505,7 @@ def __repr__(self):
def __reversed__(self):
""" Iterate over the names of attributes in reverse order. """
if self._objdb_attributes is not None:
if self._parent._track_order:
if self._parent.track_order:
attrs = sorted(self._objdb_attributes.items(), key=lambda x: x[1]['created'])
else:
attrs = sorted(self._objdb_attributes.items())
Expand All @@ -522,7 +522,7 @@ def __reversed__(self):
req = self._req_prefix
# backup over the trailing slash in req
req = req[:-1]
rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent._track_order else "0"})
rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent.track_order else "0"})
attributes = rsp['attributes']

attrlist = []
Expand Down
24 changes: 23 additions & 1 deletion h5pyd/_hl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,10 @@ def modified(self):
"""Last modified time as a datetime object"""
return self.id._modified

@property
def track_order(self):
return self._track_order

def verifyCert(self):
# default to validate CERT for https requests, unless
# the H5PYD_VERIFY_CERT environment variable is set and True
Expand Down Expand Up @@ -1072,7 +1076,7 @@ def DELETE(self, req, params=None):
if rsp.status_code != 200:
raise IOError(rsp.reason)

def __init__(self, oid, file=None):
def __init__(self, oid, file=None, track_order=None):
""" Setup this object, given its low-level identifier """
self._id = oid
self.log = self._id.http_conn.logging
Expand All @@ -1092,6 +1096,24 @@ def __init__(self, oid, file=None):
else:
pass

if track_order is None:
# set order based on group creation props
obj_json = self.id.obj_json
if "creationProperties" in obj_json:
cpl = obj_json["creationProperties"]
else:
cpl = {}
if "CreateOrder" in cpl:
createOrder = cpl["CreateOrder"]
if not createOrder or createOrder == "0":
self._track_order = False
else:
self._track_order = True
else:
self._track_order = False
else:
self._track_order = track_order

def __hash__(self):
return hash(self.id.id)

Expand Down
27 changes: 17 additions & 10 deletions h5pyd/_hl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from . import selections as sel
from .datatype import Datatype
from .h5type import getTypeItem, createDataType, check_dtype, special_dtype, getItemSize
from .. import config

_LEGACY_GZIP_COMPRESSION_VALS = frozenset(range(10))
VERBOSE_REFRESH_TIME = 1.0 # 1 second
Expand Down Expand Up @@ -77,6 +78,7 @@ def make_new_dset(
compression_opts=None,
fillvalue=None,
scaleoffset=None,
track_order=None,
track_times=None,
initializer=None,
initializer_opts=None
Expand All @@ -88,6 +90,7 @@ def make_new_dset(

# fill in fields for the body of the POST request as we got
body = {}
cfg = config.get_config()

# Convert data to a C-contiguous ndarray
if data is not None and not isinstance(data, Empty):
Expand Down Expand Up @@ -249,17 +252,14 @@ def make_new_dset(

dcpl["fillValue"] = fillvalue

if track_order or cfg.track_order:
dcpl["CreateOrder"] = 1

if chunks and isinstance(chunks, dict):
dcpl["layout"] = chunks

body["creationProperties"] = dcpl

"""
if track_times in (True, False):
dcpl.set_obj_track_times(track_times)
elif track_times is not None:
raise TypeError("track_times must be either True or False")
"""
if maxshape is not None and len(maxshape) > 0:
if shape is not None:
maxshape = tuple(m if m is not None else 0 for m in maxshape)
Expand Down Expand Up @@ -291,7 +291,7 @@ def make_new_dset(

if data is not None:
# init data
dset = Dataset(dset_id)
dset = Dataset(dset_id, track_order=(track_order or cfg.track_order))
dset[...] = data

return dset_id
Expand Down Expand Up @@ -763,12 +763,12 @@ def allocated_size(self):
self._getVerboseInfo()
return self._allocated_size

def __init__(self, bind, track_order=False):
def __init__(self, bind, track_order=None):
"""Create a new Dataset object by binding to a low-level DatasetID."""

if not isinstance(bind, DatasetID):
raise ValueError(f"{bind} is not a DatasetID")
HLObject.__init__(self, bind)
HLObject.__init__(self, bind, track_order=track_order)

self._dcpl = self.id.dcpl_json
self._filters = filters.get_filters(self._dcpl)
Expand All @@ -778,7 +778,14 @@ def __init__(self, bind, track_order=False):
# make a numpy dtype out of the type json
self._dtype = createDataType(self.id.type_json)
self._item_size = getItemSize(self.id.type_json)
self._track_order = track_order
if track_order is None:
if "CreateOrder" in self._dcpl:
if not self._dcpl["CreateOrder"] or self._dcpl["CreateOrder"] == "0":
self._track_order = False
else:
self._track_order = True
else:
self._track_order = track_order

self._shape = self.get_shape()

Expand Down
65 changes: 39 additions & 26 deletions h5pyd/_hl/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def __init__(self, bind, track_order=None, **kwargs):

if not isinstance(bind, GroupID):
raise ValueError(f"{bind} is not a GroupID")
HLObject.__init__(self, bind, **kwargs)

HLObject.__init__(self, bind, track_order=track_order, **kwargs)
"""
if track_order is None:
# set order based on group creation props
gcpl = self.id.gcpl_json
Expand All @@ -73,6 +73,7 @@ def __init__(self, bind, track_order=None, **kwargs):
self._track_order = False
else:
self._track_order = track_order
"""
self._req_prefix = "/groups/" + self.id.uuid
self._link_db = {} # cache for links

Expand Down Expand Up @@ -164,7 +165,7 @@ def _get_link_json(self, h5path):
req = "/groups/" + parent_uuid + "/links/" + name

try:
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
rsp_json = self.GET(req, params={"CreateOrder": "1" if self.track_order else "0"})
except IOError:
raise KeyError("Unable to open object (Component not found)")

Expand Down Expand Up @@ -219,9 +220,9 @@ def _make_group(self, parent_id=None, parent_name=None, link=None, track_order=N

group_json = rsp
groupId = GroupID(self, group_json)
sub_group = Group(groupId)
if track_order or cfg.track_order:
sub_group._track_order = True

sub_group = Group(groupId, track_order=(track_order or cfg.track_order))

if parent_name:
if parent_name[-1] == '/':
parent_name = parent_name + link
Expand Down Expand Up @@ -355,6 +356,8 @@ def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds):
conjunction with the scale/offset filter.
fillvalue
(Scalar) Use this value for uninitialized parts of the dataset.
track_oder
(T/F) List attributes by creation_time if set
track_times
(T/F) Enable dataset creation timestamps.
initializer
Expand Down Expand Up @@ -426,12 +429,17 @@ def create_dataset_like(self, name, other, **kwupdate):
'fillvalue'):
kwupdate.setdefault(k, getattr(other, k))
# TODO: more elegant way to pass these (dcpl to create_dataset?)
# TBD: track times and creation order not yet supported
"""
dcpl = other.id.get_create_plist()
kwupdate.setdefault('track_times', dcpl.get_obj_track_times())
kwupdate.setdefault('track_order', dcpl.get_attr_creation_order() > 0)
"""

dcpl_json = other.id.dcpl_json
track_order = None
if "CreateOrder" in dcpl_json:
createOrder = dcpl_json["CreateOrder"]
if not createOrder or createOrder == "0":
track_order = False
else:
track_order = True

kwupdate.setdefault('track_order', track_order)

# Special case: the maxshape property always exists, but if we pass it
# to create_dataset, the new dataset will automatically get chunked
Expand Down Expand Up @@ -566,9 +574,10 @@ def getObjByUuid(self, uuid, collection_type=None, track_order=None):
else:
raise IOError(f"Unexpected uuid: {uuid}")
objdb = self.id.http_conn.getObjDb()
if objdb and uuid in objdb:
if objdb and uuid in objdb and False:
# we should be able to construct an object from objdb json
obj_json = objdb[uuid]
print('fetch from db')
else:
# will need to get JSON from server
req = f"/{collection_type}/{uuid}"
Expand All @@ -583,11 +592,11 @@ def getObjByUuid(self, uuid, collection_type=None, track_order=None):
elif collection_type == 'datatypes':
tgt = Datatype(TypeID(self, obj_json))
elif collection_type == 'datasets':
# create a Table if the daset is one dimensional and compound
# create a Table if the dataset is one dimensional and compound
shape_json = obj_json["shape"]
dtype_json = obj_json["type"]
if "dims" in shape_json and len(shape_json["dims"]) == 1 and dtype_json["class"] == 'H5T_COMPOUND':
tgt = Table(DatasetID(self, obj_json))
tgt = Table(DatasetID(self, obj_json), track_order=track_order)
else:
tgt = Dataset(DatasetID(self, obj_json), track_order=track_order)
else:
Expand Down Expand Up @@ -700,6 +709,10 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Non
Return HardLink, SoftLink and ExternalLink classes. Return
"default" if nothing with that name exists.
"track_order" is (T/F):
List links and attributes by creation order if True, alphanumerically if False.
If None, the track_order used when creating the group will be used.
"limit" is an integer:
If "name" is None, this will return the first "limit" links in the group.
Expand Down Expand Up @@ -876,8 +889,8 @@ def __setitem__(self, name, obj):
parent_uuid = link_json["id"]
req = "/groups/" + parent_uuid
params = {}
if self._track_order is not None:
params["CreateOrder"] = "1" if self._track_order else "0"
if self.track_order is not None:
params["CreateOrder"] = "1" if self.track_order else "0"
group_json = self.GET(req, params=params)
tgt = Group(GroupID(self, group_json))
tgt[basename] = obj
Expand Down Expand Up @@ -977,8 +990,8 @@ def __len__(self):

req = "/groups/" + self.id.uuid
params = {}
if self._track_order is not None:
params["CreateOrder"] = "1" if self._track_order else "0"
if self.track_order is not None:
params["CreateOrder"] = "1" if self.track_order else "0"
rsp_json = self.GET(req, params=params)
return rsp_json['linkCount']

Expand All @@ -989,8 +1002,8 @@ def __iter__(self):
if links is None:
req = "/groups/" + self.id.uuid + "/links"
params = {}
if self._track_order is not None:
params["CreateOrder"] = "1" if self._track_order else "0"
if self.track_order is not None:
params["CreateOrder"] = "1" if self.track_order else "0"
rsp_json = self.GET(req, params=params)
links = rsp_json['links']

Expand All @@ -1003,7 +1016,7 @@ def __iter__(self):
for x in links:
yield x['title']
else:
if self._track_order:
if self.track_order:
links = sorted(links.items(), key=lambda x: x[1]['created'])
else:
links = sorted(links.items())
Expand Down Expand Up @@ -1217,8 +1230,8 @@ def visititems(self, func):
# request from server
req = "/groups/" + parent.id.uuid + "/links"
params = {}
if self._track_order is not None:
params["CreateOrder"] = "1" if self._track_order else "0"
if self.track_order is not None:
params["CreateOrder"] = "1" if self.track_order else "0"
rsp_json = self.GET(req, params=params)
links = rsp_json['links']
for link in links:
Expand Down Expand Up @@ -1270,7 +1283,7 @@ def __reversed__(self):

if links is None:
req = "/groups/" + self.id.uuid + "/links"
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
rsp_json = self.GET(req, params={"CreateOrder": "1" if self.track_order else "0"})
links = rsp_json['links']

# reset the link cache
Expand All @@ -1282,7 +1295,7 @@ def __reversed__(self):
for x in reversed(links):
yield x['title']
else:
if self._track_order:
if self.track_order:
links = sorted(links.items(), key=lambda x: x[1]['created'])
else:
links = sorted(links.items())
Expand Down
4 changes: 2 additions & 2 deletions h5pyd/_hl/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,13 @@ class Table(Dataset):
"""
Represents an HDF5 dataset
"""
def __init__(self, bind):
def __init__(self, bind, track_order=None):
""" Create a new Table object by binding to a low-level DatasetID.
"""

if not isinstance(bind, DatasetID):
raise ValueError(f"{bind} is not a DatasetID")
Dataset.__init__(self, bind)
Dataset.__init__(self, bind, track_order=track_order)

if len(self._dtype) < 1:
raise ValueError("Table type must be compound")
Expand Down
6 changes: 3 additions & 3 deletions h5pyd/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import sys
import numpy

version = "0.20.0"
version = "0.21.0"

hdf5_version = "REST"

Expand All @@ -28,8 +28,8 @@
else ("",)
)

api_version_tuple = (0, 20, 0)
api_version = "0.20.0"
api_version_tuple = (0, 21, 0)
api_version = "0.21.0"

__doc__ = f"""\
This is h5pyd **{version}**
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]
requires-python = ">=3.8"
version = "0.20.0"
version = "0.21.0"

dependencies = [
"numpy >=2.0.0rc1; python_version>='3.9'",
Expand Down
Loading

0 comments on commit 8734376

Please sign in to comment.