Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct duplicate ping_times during EK60 conversion (fixes #235) #433

Merged
merged 15 commits into from
Sep 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions echopype/convert/set_groups_ek60.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,95 @@
import warnings
from collections import defaultdict
from datetime import datetime as dt

import numpy as np
import xarray as xr
from _echopype_version import version as ECHOPYPE_VERSION

from .set_groups_base import DEFAULT_CHUNK_SIZE, SetGroupsBase, set_encodings


class SetGroupsEK60(SetGroupsBase):
"""Class for saving groups to netcdf or zarr from EK60 data files."""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.old_ping_time = None
# correct duplicate ping_time
for ch in self.parser_obj.config_datagram["transceivers"].keys():
ping_time = self.parser_obj.ping_time[ch]
_, unique_idx = np.unique(ping_time, return_index=True)
duplicates = np.invert(np.isin(np.arange(len(ping_time)), unique_idx))
if duplicates.any():
if self.old_ping_time is None:
if (
len({arr.shape for arr in self.parser_obj.ping_time.values()})
== 1
and np.unique(
np.stack(self.parser_obj.ping_time.values()), axis=0
).shape[0]
== 1
):
self.old_ping_time = self.parser_obj.ping_time[ch]
else:
ping_times = [
xr.DataArray(arr, dims="ping_time")
for arr in self.parser_obj.ping_time.values()
]
self.old_ping_time = xr.concat(ping_times, dim="ping_time")

backscatter_r = self.parser_obj.ping_data_dict["power"][ch]
# indexes of duplicates including the originals
# (if there are 2 times that are the same, both will be included)
(all_duplicates_idx,) = np.where(
np.isin(ping_time, ping_time[duplicates][0])
)
if np.array_equal(
backscatter_r[all_duplicates_idx[0]],
backscatter_r[all_duplicates_idx[1]],
):
warnings.warn(
"duplicate pings with identical values detected; the duplicate pings will be removed" # noqa
)
for v in self.parser_obj.ping_data_dict.values():
if v[ch] is None or len(v[ch]) == 0:
continue
if isinstance(v[ch], np.ndarray):
v[ch] = v[ch][unique_idx]
else:
v[ch] = [v[ch][i] for i in unique_idx]
self.parser_obj.ping_time[ch] = self.parser_obj.ping_time[ch][
unique_idx
]
else:
warnings.warn(
"duplicate ping times detected; the duplicate times will be incremented by 1 nanosecond and remain in the ping_time coordinate. The original ping times will be preserved in the Provenance group" # noqa
)

deltas = duplicates * np.timedelta64(1, "ns")
new_ping_time = ping_time + deltas
self.parser_obj.ping_time[ch] = new_ping_time

def set_provenance(self) -> xr.Dataset:
"""Set the Provenance group."""
# Collect variables
prov_dict = {
"conversion_software_name": "echopype",
"conversion_software_version": ECHOPYPE_VERSION,
"conversion_time": dt.utcnow().isoformat(timespec="seconds")
+ "Z", # use UTC time
"src_filenames": self.input_file,
"duplicate_ping_times": 1 if self.old_ping_time is not None else 0,
}
# Save
if self.old_ping_time is not None:
ds = xr.Dataset(data_vars={"old_ping_time": self.old_ping_time})
else:
ds = xr.Dataset()
ds = ds.assign_attrs(prov_dict)
return ds

def set_env(self) -> xr.Dataset:
"""Set the Environment group."""
ch_ids = list(self.parser_obj.config_datagram["transceivers"].keys())
Expand Down
3 changes: 3 additions & 0 deletions echopype/echodata/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,15 @@ def combine_echodata(echodatas: List[EchoData], combine_attrs="override") -> Ech
# save ping time before reversal correction
if old_ping_time is not None:
result.provenance["old_ping_time"] = old_ping_time
result.provenance.attrs["reversed_ping_times"] = 1
# save location time before reversal correction
if old_location_time is not None:
result.provenance["old_location_time"] = old_location_time
result.provenance.attrs["reversed_ping_times"] = 1
# save mru time before reversal correction
if old_mru_time is not None:
result.provenance["old_mru_time"] = old_mru_time
result.provenance.attrs["reversed_ping_times"] = 1
# TODO: possible parameter to disable original attributes and original ping_time storage
# in provenance group?
# save attrs from before combination
Expand Down
9 changes: 9 additions & 0 deletions echopype/tests/convert/test_convert_ek60.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,12 @@ def test_convert_ek60_echoview_raw():
echodata.beam.backscatter_r.isel(frequency=fidx, ping_time=slice(None, 10), range_bin=slice(1, None)),
atol=9e-6, rtol=atol
)

def test_convert_ek60_duplicate_ping_times():
"""Convert a file with duplicate ping times"""

raw_path = ek60_path / "ooi" / "CE02SHBP-MJ01C-07-ZPLSCB101_OOI-D20191201-T000000.raw"
ed = open_raw(raw_path, "EK60")

assert "duplicate_ping_times" in ed.provenance.attrs
assert "old_ping_time" in ed.provenance
2 changes: 1 addition & 1 deletion echopype/tests/echodata/test_echodata_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def test_attr_storage():
group_attrs = combined.provenance[f"{group}_attrs"]
for i, ed in enumerate(eds):
for attr, value in getattr(ed, group).attrs.items():
assert group_attrs.isel(echodata_filename=i).sel({f"{group}_attr_key": attr}).data[()] == value
assert str(group_attrs.isel(echodata_filename=i).sel({f"{group}_attr_key": attr}).data[()]) == str(value)

# check selection by echodata_filename
for file in ek60_test_data:
Expand Down