diff --git a/dmci/api/worker.py b/dmci/api/worker.py index ef71ea9..13271c4 100644 --- a/dmci/api/worker.py +++ b/dmci/api/worker.py @@ -78,39 +78,71 @@ def validate(self, data): True if xsd and information content checks are passing msg : str Validation message + data : bytes + bytes representation of the xml data """ # Takes in bytes-object data # Gives msg when both validating and not validating if not isinstance(data, bytes): - return False, "Input must be bytes type" + return False, "Input must be bytes type", data # Check xml file against XML schema definition valid = self._xsd_obj.validate(etree.fromstring(data)) msg = repr(self._xsd_obj.error_log) - data_mod = data + if valid: # Check information content valid, msg = self._check_information_content(data) - # Append env string to namespace in data + if not valid: + return valid, msg, data + if self._conf.env_string: + + # Append env string to namespace in metadata_identifier logger.debug("Identifier namespace: %s" % self._namespace) logger.debug("Environment customization %s" % self._conf.env_string) ns_re_pattern = re.compile(r"\w.\w."+self._conf.env_string) logger.debug(re.search(ns_re_pattern, self._namespace)) + if re.search(ns_re_pattern, self._namespace) is None: full_namespace = f"{self._namespace}.{self._conf.env_string}" - data_mod = re.sub( + data = re.sub( str.encode(f"{self._namespace}"), str.encode(f"{full_namespace}"), data, ) self._namespace = full_namespace + + # Append env string to the namespace in the parent block, if present + if bool(re.search(b'', data)): + match_parent_block = re.search( + b'(.+?)', + data + ) + found_parent_block_content = match_parent_block.group(1) + found_parent_block_content = found_parent_block_content.split(b":") + if len(found_parent_block_content) != 2: + err = f"Malformed parent dataset identifier {found_parent_block_content}" + logger.error(err) + return False, err, data + old_parent_namespace = found_parent_block_content[0].decode() + logger.debug("Parent dataset namespace: %s" % old_parent_namespace) + if re.search(ns_re_pattern, old_parent_namespace) is None: + new_parent_namespace = f"{old_parent_namespace}.{self._conf.env_string}" + data = re.sub( + str.encode(f'{old_parent_namespace}'), + str.encode(f'{new_parent_namespace}'), + data, + ) + # Add landing page info - data_mod = self._add_landing_page( - data_mod, self._conf.catalog_url, self._file_metadata_id + data = self._add_landing_page( + data, self._conf.catalog_url, self._file_metadata_id ) - return valid, msg, data_mod + return valid, msg, data def distribute(self): """Loop through all distributors listed in the config and call diff --git a/dmci/distributors/pycsw_dist.py b/dmci/distributors/pycsw_dist.py index dcd35e2..a906537 100644 --- a/dmci/distributors/pycsw_dist.py +++ b/dmci/distributors/pycsw_dist.py @@ -236,6 +236,7 @@ def _read_response_text(self, key, text): n_ins = "0" n_upd = "0" n_del = "0" + if root.tag == "{%s}ExceptionReport" % ns_ows: node = root.find("{%s}Exception" % ns_ows, root.nsmap) msg = "Unknown Error" diff --git a/tests/files/api/malformedparentid.xml b/tests/files/api/malformedparentid.xml new file mode 100644 index 0000000..bf95a39 --- /dev/null +++ b/tests/files/api/malformedparentid.xml @@ -0,0 +1,156 @@ + + test.no:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b + Direct Broadcast data processed in satellite swath to L1C + Direktesendte satellittdata prosessert i satellittsveip til L1C. + Direct Broadcast data received at MET NORWAY Oslo. Processed by standard processing software to geolocated and calibrated values in satellite swath in received instrument resolution. + Direktesendte satellittdata mottatt ved Meteorologisk Institutt Oslo. Prosessert med standard prosesseringssoftware til geolokaliserte og kalibrerte verdier i satellitsveip i mottatt instrument oppløsning. + Active + Complete + METNCS + + + 2021-04-29T00:46:05Z + Created + + + + 2021-04-29T00:28:44.977627Z + 2021-04-29T00:39:55.000000Z + + climatologyMeteorologyAtmosphere + environment + oceans + + Earth Science > Atmosphere > Atmospheric radiation + https://gcmdservices.gsfc.nasa.gov/static/kms/ + + + + Meteorological geographical features + Atmospheric conditions + Oceanographic geographical features + http://inspire.ec.europa.eu/theme + + + + Weather and climate + https://register.geonorge.no/subregister/metadata-kodelister/kartverket/nasjonal-temainndeling + + + + + 80.49233 + 36.540688 + 79.40124 + 1.5549301 + + + en + Operational + + CC-BY-4.0 + http://spdx.org/licenses/CC-BY-4.0 + + + Technical contact + DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING + post@met.no + MET NORWAY + + + Metadata author + DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING + post@met.no + unknown + + + + MET NORWAY + MET NORWAY + + met.no + + + OPeNDAP + Open-source Project for a Network Data Access Protocol + https://thredds.met.no/thredds/dodsC/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc + + + OGC WMS + OGC Web Mapping Service, URI to GetCapabilities Document. + https://thredds.met.no/thredds/wms/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc?service=WMS&version=1.3.0&request=GetCapabilities + + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_bidirectional_reflectance + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_bidirectional_reflectance + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_brightness_temperature + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + toa_bidirectional_reflectance + + + + HTTP + Direct download of file + https://thredds.met.no/thredds/fileServer/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc + + 64db6102-14ce-41e9-b93b-61dbb2cb8b4e + + aqua-modis-1km-20210429002844-20210429003955.nc + /lustre/storeB/immutable/archive/projects/remotesensing/satellite-thredds/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc + NetCDF-CF + 1862.00 + 4e1833610272ee63228f575d1c875fbe + + + Govermental core service + Govermental core service + + + Aqua + Aqua + https://www.wmo-sat.info/oscar/satellites/view/aqua + + MODIS + MODIS + https://www.wmo-sat.info/oscar/instruments/view/modis + + + Space Borne Instrument + + DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING + 2021-04-29 + Direct Broadcast data processed in satellite swath to L1C + + diff --git a/tests/test_api/test_app.py b/tests/test_api/test_app.py index d255ae6..efc79f0 100644 --- a/tests/test_api/test_app.py +++ b/tests/test_api/test_app.py @@ -33,6 +33,7 @@ from dmci.api import App MOCK_XML = b"" +MOCK_XML_MOD = b"" @pytest.fixture(scope="function") @@ -156,8 +157,24 @@ def testApiApp_InsertUpdateRequests(client, monkeypatch): assert client.post("/v1/insert", data=MOCK_XML).status_code == 200 assert client.post("/v1/update", data=MOCK_XML).status_code == 200 - mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Failed to persist", 666)) + # Data is valid and gets modified by validate + mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML_MOD)) + assert client.post("/v1/insert", data=MOCK_XML).status_code == 200 + assert client.post("/v1/update", data=MOCK_XML).status_code == 200 + + # first _persist_file fails + with monkeypatch.context() as mp: + mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML)) + mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Failed to write the file", 666)) assert client.post("/v1/insert", data=MOCK_XML).status_code == 666 + assert client.post("/v1/update", data=MOCK_XML).status_code == 666 + + # first _persist_file works + with monkeypatch.context() as mp: + mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML)) + mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Everything is OK", 200)) + assert client.post("/v1/insert", data=MOCK_XML).status_code == 200 + assert client.post("/v1/update", data=MOCK_XML).status_code == 200 # Data is not valid with monkeypatch.context() as mp: @@ -194,6 +211,26 @@ def testApiApp_InsertUpdateRequests(client, monkeypatch): # END Test testApiApp_InsertRequests +@pytest.mark.api +def testApiApp_PersistAgainAfterModification(client, monkeypatch): + + outputs = iter([("Everything is OK", 200), ("Failure in persisting", 666), + ("Everything is OK", 200), ("Failure in persisting", 666)]) + + @staticmethod + def fake_output(data, full_path): + return next(outputs) + + with monkeypatch.context() as mp: + # Data is valid but failure to persist again after modifications + mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML_MOD)) + mp.setattr("dmci.api.app.App._persist_file", fake_output) + assert client.post("/v1/insert", data=MOCK_XML).status_code == 666 + assert client.post("/v1/update", data=MOCK_XML).status_code == 666 + +# END Test testApiApp_PersistAgainAfterModification + + @pytest.mark.api def testApiApp_DeleteRequests(client, monkeypatch): """Test api delete request.""" diff --git a/tests/test_api/test_worker.py b/tests/test_api/test_worker.py index 027547d..79ddc62 100644 --- a/tests/test_api/test_worker.py +++ b/tests/test_api/test_worker.py @@ -104,6 +104,7 @@ def testApiWorker_Distributor(tmpConf, mockXml, monkeypatch): @pytest.mark.api def testApiWorker_Validator(monkeypatch, filesDir): """Test the Worker class validator.""" + xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd") passFile = os.path.join(filesDir, "api", "passing.xml") passFilewLP = os.path.join(filesDir, "api", "passing_wlandingpage.xml") @@ -121,7 +122,7 @@ def testApiWorker_Validator(monkeypatch, filesDir): # Invalid data format passData = readFile(passFile) - assert passWorker.validate(passData) == (False, "Input must be bytes type") + assert passWorker.validate(passData) == (False, "Input must be bytes type", passData) # Valid data format with monkeypatch.context() as mp: @@ -159,13 +160,25 @@ def testApiWorker_Validator(monkeypatch, filesDir): assert isinstance(msg, str) assert msg + # _check_information_content fails + with monkeypatch.context() as mp: + mp.setattr(Worker, "_check_information_content", + lambda *a: (False, "_check_information_content failed")) + + passData = bytes(readFile(passFile), "utf-8") + valid, msg, passData = passWorker.validate(passData) + assert valid is False + assert isinstance(msg, str) + assert msg == "_check_information_content failed" + # END Test testApiWorker_Validator @pytest.mark.api -def testApiWorker_NamespaceReplacement(monkeypatch, filesDir): - """Test the replacement of the namespace with the one read from the config.""" +def testApiWorker_NamespaceReplacement(filesDir): + """Test the replacement of the namespace with the one customized for the environment.""" + xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd") passFile = os.path.join(filesDir, "api", "passing.xml") @@ -189,6 +202,41 @@ def testApiWorker_NamespaceReplacement(monkeypatch, filesDir): # END Test testApiWorker_NamespaceReplacement +@pytest.mark.api +def testApiWorker_ParentNamespaceReplacement(filesDir): + """Test the replacement of the namespace in the parent dataset + with the one customized for the environment.""" + + xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd") + passFile = os.path.join(filesDir, "api", "passing.xml") + + xsdObj = lxml.etree.XMLSchema(lxml.etree.parse(xsdFile)) + passWorker = Worker("none", passFile, xsdObj) + + passWorker._conf.env_string = "yolo" + + # Valid XML + passData = bytes(readFile(passFile), "utf-8") + valid, msg, passData = passWorker.validate(passData) + assert valid is True + + match_parent_id = re.search( + b'(.+?)', passData + ) + parent_id = match_parent_id.group(1) + namespace = parent_id.split(b":")[0].decode() + assert namespace == "test.no.yolo" + + # Malformed parent dataset id + badparentidFile = os.path.join(filesDir, "api", "malformedparentid.xml") + badparentWorker = Worker("none", badparentidFile, xsdObj) + badparentData = bytes(readFile(badparentidFile), "utf-8") + valid, msg, data = badparentWorker.validate(badparentData) + assert valid is False + assert msg == "Malformed parent dataset identifier [b'64db6102-14ce-41e9-b93b-61dbb2cb8b4e']" + +# END Test testApiWorker_NamespaceReplacement + @pytest.mark.api def testApiWorker_CheckInfoContent(monkeypatch, filesDir):