Allow ES70 files and handle datagram anomaly (#409)

* Allow data files of type ES70 The ES70 file format is identical to the ER60 and ES60 file format, so allow such files. Also remove a depreciation warning about the use of np.complex. * Decode from latin_1, not the default utf-8 * Cope with not finding the next datagram Sometimes files don't end properly (sometimes due to the software crashing while writing data). When than happens just take the data we have be done with that file. * Prevent a depreciated warning about np.complex * Partial fix to support EK/ES80 files that contain data from GPT transceivers * This ES/EK 60/70 text field is latin_1 encoding, not the decode() default of UTF-8 * Bug fix - files with no echo data in the first ping caused a crash If the first ping had no data in it, the data_list[0][0] reference would fail. Changed the code to more directly do what was desired (see if the data type was a complex data type). Also note that complex (or the depreciated np.complex) is the same data type as np.complex128, so removed 'complex' from the list. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Undo the previous commit (should not of gone on this branch or PR) * sigh... and fix the fix... * Bug fix - files with no echo data in the first ping caused a crash If the first ping had no data in it, the data_list[0][0] reference would fail. Changed the code to more directly do what was intended (see if the data type was a complex data type). Also note that complex (or the depreciated np.complex) is the same data type as np.complex128, so removed 'complex' from the list. * Fix error when Simrad file ends abruptly Sometimes Simrad files don't end properly (e.g., due to the software crashing while writing data). When than happens just take the data we have be done with that file. * Avoid 'not UTF-8' encoding error * Don't fail from NMEA parsing errors Catch pynmea2.checksumError exceptions instead of having the code end. Substitute 'None' for that NMEA message. * Catch all pynmea2 errors We don't really care about NMEA errors - just ignore them as there are always more coming... * oops, AttributeError is only raised by pynmea2, not defined by pynmea2 * Fix crash when there is no data in the first ping in a file This is a bit of a hack (it assumes that a file has at least 2 pings). It could possibly be solved closer to the actual file reading. * Fix crash when a file has no angle data Sometimes the angle data is a list with the first element an numpy array of size 0 then the remaining elements are None. Cope with this. * Better fix for commit 1c446f7 When there is no angle data for a particular ping, use a 2D empty array instead of a 1D array. This is to match the dimension of the angle data for the other pings (otherwise a crash happens when making the data have the same number of elements for all pings in the file). * [pre-commit.ci] pre-commit autoupdate (#455) updates: - [github.com/PyCQA/flake8: 3.9.2 → 4.0.1](PyCQA/flake8@3.9.2...4.0.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * add es70 to core and convert/api * factor out COMMON_KEYS for ER60/ES60/ES70 * add ES70 to sonar_models calling set_nmea * add correpsonding changes for ES80 in core and convert/api * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: leewujung <leewujung@gmail.com>
OSOceanAcoustics · Oct 23, 2021 · 4c77300 · 4c77300
1 parent 4127af3
commit 4c77300
Show file tree

Hide file tree

Showing 7 changed files with 99 additions and 77 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -26,7 +26,7 @@ repos:
       - id: black
 
   - repo: /~https://github.com/PyCQA/flake8
-    rev: 3.9.2
+    rev: 4.0.1
     hooks:
       - id: flake8
 

diff --git a/echopype/convert/api.py b/echopype/convert/api.py
@@ -423,7 +423,7 @@ def open_raw(
     # Set up echodata object
     echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model)
     # Top-level date_created varies depending on sonar model
-    if sonar_model in ["EK60", "EK80"]:
+    if sonar_model in ["EK60", "ES70", "EK80", "ES80"]:
         echodata.top = setgrouper.set_toplevel(
             sonar_model=sonar_model, date_created=parser.config_datagram["timestamp"]
         )
@@ -433,7 +433,7 @@ def open_raw(
         )
     echodata.environment = setgrouper.set_env()
     echodata.platform = setgrouper.set_platform()
-    if sonar_model in ["EK60", "EK80"]:
+    if sonar_model in ["EK60", "ES70", "EK80", "ES80"]:
         echodata.nmea = setgrouper.set_nmea()
     echodata.provenance = setgrouper.set_provenance()
     echodata.sonar = setgrouper.set_sonar()

diff --git a/echopype/convert/parse_base.py b/echopype/convert/parse_base.py
@@ -125,7 +125,9 @@ def parse_raw(self):
             # Rectangularize all data and convert to numpy array indexed by channel
             for data_type in ["power", "angle", "complex"]:
                 for k, v in self.ping_data_dict[data_type].items():
-                    if all(x is None for x in v):  # if no data in a particular channel
+                    if all(
+                        (x is None) or (x.size == 0) for x in v
+                    ):  # if no data in a particular channel
                         self.ping_data_dict[data_type][k] = None
                     else:
                         # Sort complex and power/angle channels
@@ -347,13 +349,14 @@ def pad_shorter_ping(data_list) -> np.ndarray:
         if (
             np.unique(lens).size != 1
         ):  # if some pings have different lengths along range
+
             if data_list[0].ndim == 2:
                 # Angle data have an extra dimension for alongship and athwartship samples
                 mask = lens[:, None, None] > np.array([np.arange(lens.max())] * 2).T
             else:
                 mask = lens[:, None] > np.arange(lens.max())
             # Take care of problem of np.nan being implicitly "real"
-            if isinstance(data_list[0][0], (np.complex, np.complex64, np.complex128)):
+            if data_list[0].dtype in {np.dtype("complex64"), np.dtype("complex128")}:
                 out_array = np.full(mask.shape, np.nan + 0j)
             else:
                 out_array = np.full(mask.shape, np.nan)

diff --git a/echopype/convert/set_groups_base.py b/echopype/convert/set_groups_base.py
@@ -208,9 +208,17 @@ def _parse_NMEA(self):
         ).squeeze()
         if idx_loc.size == 1:  # in case of only 1 matching message
             idx_loc = np.expand_dims(idx_loc, axis=0)
-        nmea_msg = [
-            pynmea2.parse(self.parser_obj.nmea["nmea_string"][x]) for x in idx_loc
-        ]
+        nmea_msg = []
+        for x in idx_loc:
+            try:
+                nmea_msg.append(pynmea2.parse(self.parser_obj.nmea["nmea_string"][x]))
+            except (
+                pynmea2.ChecksumError,
+                pynmea2.SentenceTypeError,
+                AttributeError,
+                pynmea2.ParseError,
+            ):
+                nmea_msg.append(None)
         lat = (
             np.array(
                 [x.latitude if hasattr(x, "latitude") else np.nan for x in nmea_msg]

diff --git a/echopype/convert/utils/ek_raw_io.py b/echopype/convert/utils/ek_raw_io.py
@@ -209,7 +209,7 @@ def _read_dgram_header(self):
                 )
         else:
             dgram_type = buf
-        dgram_type = dgram_type.decode()
+        dgram_type = dgram_type.decode("latin_1")
 
         #  11/26/19 - RHT
         #  As part of the rewrite of read to remove the reverse seeking,
@@ -477,11 +477,15 @@ def _find_next_datagram(self):
         old_file_pos = self._tell_bytes()
         log.warning("Attempting to find next valid datagram...")
 
-        while self.peek()["type"][:3] not in list(self.DGRAM_TYPE_KEY.keys()):
-            self._seek_bytes(1, 1)
-
-        log.warning("Found next datagram:  %s", self.peek())
-        log.warning("Skipped ahead %d bytes", self._tell_bytes() - old_file_pos)
+        try:
+            while self.peek()["type"][:3] not in list(self.DGRAM_TYPE_KEY.keys()):
+                self._seek_bytes(1, 1)
+        except DatagramReadError:
+            log.warning("No next datagram found. Ending reading of file.")
+            raise SimradEOF()
+        else:
+            log.warning("Found next datagram:  %s", self.peek())
+            log.warning("Skipped ahead %d bytes", self._tell_bytes() - old_file_pos)
 
     def tell(self):
         """

diff --git a/echopype/convert/utils/ek_raw_parsers.py b/echopype/convert/utils/ek_raw_parsers.py
@@ -18,7 +18,7 @@
 
 from .ek_date_conversion import nt_to_unix
 
-TCVR_CH_NUM_MATCHER = re.compile(r"\d{6}-\w{1,2}")
+TCVR_CH_NUM_MATCHER = re.compile(r"\d{6}-\w{1,2}|\w{12}-\w{1,2}")
 
 __all__ = [
     "SimradNMEAParser",
@@ -416,7 +416,7 @@ class SimradNMEAParser(_SimradDatagramParser):
                              ready for writing to disk
     """
 
-    nmea_head_re = re.compile("\$[A-Za-z]{5},")  # noqa
+    nmea_head_re = re.compile(r"\$[A-Za-z]{5},")  # noqa
 
     def __init__(self):
         headers = {
@@ -1151,7 +1151,7 @@ def _pack_contents(self, data, version):
 
 class SimradConfigParser(_SimradDatagramParser):
     """
-    Simrad Configuration Datagram parser operates on dictonaries with the following keys:
+    Simrad Configuration Datagram parser operates on dictionaries with the following keys:
 
         type:         string == 'CON0'
         low_date:     long uint representing LSBytes of 64bit NT date
@@ -1176,7 +1176,7 @@ class SimradConfigParser(_SimradDatagramParser):
         beam_config                     [str] Raw XML string containing beam config. info
 
 
-    Transducer Config Keys (ER60/ES60 sounders):
+    Transducer Config Keys (ER60/ES60/ES70 sounders):
         channel_id                      [str]   channel ident string
         beam_type                       [long]  Type of channel (0 = Single, 1 = Split)
         frequency                       [float] channel frequency
@@ -1238,6 +1238,34 @@ class SimradConfigParser(_SimradDatagramParser):
                         ready for writing to disk
     """
 
+    COMMON_KEYS = [
+        ("channel_id", "128s"),
+        ("beam_type", "l"),
+        ("frequency", "f"),
+        ("gain", "f"),
+        ("equivalent_beam_angle", "f"),
+        ("beamwidth_alongship", "f"),
+        ("beamwidth_athwartship", "f"),
+        ("angle_sensitivity_alongship", "f"),
+        ("angle_sensitivity_athwartship", "f"),
+        ("angle_offset_alongship", "f"),
+        ("angle_offset_athwartship", "f"),
+        ("pos_x", "f"),
+        ("pos_y", "f"),
+        ("pos_z", "f"),
+        ("dir_x", "f"),
+        ("dir_y", "f"),
+        ("dir_z", "f"),
+        ("pulse_length_table", "5f"),
+        ("spare1", "8s"),
+        ("gain_table", "5f"),
+        ("spare2", "8s"),
+        ("sa_correction_table", "5f"),
+        ("spare3", "8s"),
+        ("gpt_software_version", "16s"),
+        ("spare4", "28s"),
+    ]
+
     def __init__(self):
         headers = {
             0: [
@@ -1257,60 +1285,9 @@ def __init__(self):
         _SimradDatagramParser.__init__(self, "CON", headers)
 
         self._transducer_headers = {
-            "ER60": [
-                ("channel_id", "128s"),
-                ("beam_type", "l"),
-                ("frequency", "f"),
-                ("gain", "f"),
-                ("equivalent_beam_angle", "f"),
-                ("beamwidth_alongship", "f"),
-                ("beamwidth_athwartship", "f"),
-                ("angle_sensitivity_alongship", "f"),
-                ("angle_sensitivity_athwartship", "f"),
-                ("angle_offset_alongship", "f"),
-                ("angle_offset_athwartship", "f"),
-                ("pos_x", "f"),
-                ("pos_y", "f"),
-                ("pos_z", "f"),
-                ("dir_x", "f"),
-                ("dir_y", "f"),
-                ("dir_z", "f"),
-                ("pulse_length_table", "5f"),
-                ("spare1", "8s"),
-                ("gain_table", "5f"),
-                ("spare2", "8s"),
-                ("sa_correction_table", "5f"),
-                ("spare3", "8s"),
-                ("gpt_software_version", "16s"),
-                ("spare4", "28s"),
-            ],
-            "ES60": [
-                ("channel_id", "128s"),
-                ("beam_type", "l"),
-                ("frequency", "f"),
-                ("gain", "f"),
-                ("equivalent_beam_angle", "f"),
-                ("beamwidth_alongship", "f"),
-                ("beamwidth_athwartship", "f"),
-                ("angle_sensitivity_alongship", "f"),
-                ("angle_sensitivity_athwartship", "f"),
-                ("angle_offset_alongship", "f"),
-                ("angle_offset_athwartship", "f"),
-                ("pos_x", "f"),
-                ("pos_y", "f"),
-                ("pos_z", "f"),
-                ("dir_x", "f"),
-                ("dir_y", "f"),
-                ("dir_z", "f"),
-                ("pulse_length_table", "5f"),
-                ("spare1", "8s"),
-                ("gain_table", "5f"),
-                ("spare2", "8s"),
-                ("sa_correction_table", "5f"),
-                ("spare3", "8s"),
-                ("gpt_software_version", "16s"),
-                ("spare4", "28s"),
-            ],
+            "ER60": self.COMMON_KEYS,
+            "ES60": self.COMMON_KEYS,
+            "ES70": self.COMMON_KEYS,
             "MBES": [
                 ("channel_id", "128s"),
                 ("beam_type", "l"),
@@ -1408,11 +1385,11 @@ def _unpack_contents(self, raw_string, bytes_read, version):
                 txcvr_header_values = list(txcvr_header_values_encoded)
                 for tx_idx, tx_val in enumerate(txcvr_header_values_encoded):
                     if isinstance(tx_val, bytes):
-                        txcvr_header_values[tx_idx] = tx_val.decode()
+                        txcvr_header_values[tx_idx] = tx_val.decode("latin_1")
 
                 txcvr = data["transceivers"].setdefault(txcvr_indx, {})
 
-                if _sounder_name_used in ["ER60", "ES60"]:
+                if _sounder_name_used in ["ER60", "ES60", "ES70"]:
                     for txcvr_field_indx, field in enumerate(txcvr_header_fields[:17]):
                         txcvr[field] = txcvr_header_values[txcvr_field_indx]
 
@@ -1505,7 +1482,7 @@ def _pack_contents(self, data, version):
             for txcvr_indx, txcvr in list(data["transceivers"].items()):
                 txcvr_contents = []
 
-                if _sounder_name_used in ["ER60", "ES60"]:
+                if _sounder_name_used in ["ER60", "ES60", "ES70"]:
                     for field in txcvr_header_fields[:17]:
                         txcvr_contents.append(txcvr[field])
 
@@ -1666,7 +1643,7 @@ def _unpack_contents(self, raw_string, bytes_read, version):
 
             else:
                 data["power"] = np.empty((0,), dtype="int16")
-                data["angle"] = np.empty((0,), dtype="int8")
+                data["angle"] = np.empty((0, 2), dtype="int8")
 
         elif version == 3:
 

diff --git a/echopype/core.py b/echopype/core.py
@@ -16,7 +16,7 @@
 
 if TYPE_CHECKING:
     # Please keep SonarModelsHint updated with the keys of the SONAR_MODELS dict
-    SonarModelsHint = Literal["AZFP", "EK60", "EK80", "EA640", "AD2CP"]
+    SonarModelsHint = Literal["AZFP", "EK60", "ES70", "EK80", "ES80", "EA640", "AD2CP"]
     PathHint = Union[str, os.PathLike, FSMap]
     FileFormatHint = Literal[".nc", ".zarr"]
     EngineHint = Literal["netcdf4", "zarr"]
@@ -70,6 +70,21 @@ def inner(test_ext: str):
             "default": "minimal",
         },
     },
+    "ES70": {
+        "validate_ext": validate_ext(".raw"),
+        "xml": False,
+        "parser": ParseEK60,
+        "set_groups": SetGroupsEK60,
+        "concat_dims": {
+            "platform": ["location_time", "ping_time"],
+            "nmea": "location_time",
+            "vendor": None,
+            "default": "ping_time",
+        },
+        "concat_data_vars": {
+            "default": "minimal",
+        },
+    },
     "EK80": {
         "validate_ext": validate_ext(".raw"),
         "xml": False,
@@ -85,6 +100,21 @@ def inner(test_ext: str):
             "default": "minimal",
         },
     },
+    "ES80": {
+        "validate_ext": validate_ext(".raw"),
+        "xml": False,
+        "parser": ParseEK80,
+        "set_groups": SetGroupsEK80,
+        "concat_dims": {
+            "platform": ["location_time", "mru_time"],
+            "nmea": "location_time",
+            "vendor": None,
+            "default": "ping_time",
+        },
+        "concat_data_vars": {
+            "default": "minimal",
+        },
+    },
     "EA640": {
         "validate_ext": validate_ext(".raw"),
         "xml": False,