Skip to content

Commit

Permalink
Merge pull request #155 from metno/fix-issue-152
Browse files Browse the repository at this point in the history
Fix issue 152
  • Loading branch information
johtoblan authored Apr 27, 2023
2 parents 54e188e + 6c34476 commit 4784d46
Show file tree
Hide file tree
Showing 5 changed files with 285 additions and 11 deletions.
46 changes: 39 additions & 7 deletions dmci/api/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,39 +78,71 @@ def validate(self, data):
True if xsd and information content checks are passing
msg : str
Validation message
data : bytes
bytes representation of the xml data
"""
# Takes in bytes-object data
# Gives msg when both validating and not validating
if not isinstance(data, bytes):
return False, "Input must be bytes type"
return False, "Input must be bytes type", data

# Check xml file against XML schema definition
valid = self._xsd_obj.validate(etree.fromstring(data))
msg = repr(self._xsd_obj.error_log)
data_mod = data

if valid:
# Check information content
valid, msg = self._check_information_content(data)
# Append env string to namespace in data
if not valid:
return valid, msg, data

if self._conf.env_string:

# Append env string to namespace in metadata_identifier
logger.debug("Identifier namespace: %s" % self._namespace)
logger.debug("Environment customization %s" % self._conf.env_string)
ns_re_pattern = re.compile(r"\w.\w."+self._conf.env_string)
logger.debug(re.search(ns_re_pattern, self._namespace))

if re.search(ns_re_pattern, self._namespace) is None:
full_namespace = f"{self._namespace}.{self._conf.env_string}"
data_mod = re.sub(
data = re.sub(
str.encode(f"<mmd:metadata_identifier>{self._namespace}"),
str.encode(f"<mmd:metadata_identifier>{full_namespace}"),
data,
)
self._namespace = full_namespace

# Append env string to the namespace in the parent block, if present
if bool(re.search(b'<mmd:related_dataset relation_type="parent">', data)):
match_parent_block = re.search(
b'<mmd:related_dataset relation_type="parent">(.+?)</mmd:related_dataset>',
data
)
found_parent_block_content = match_parent_block.group(1)
found_parent_block_content = found_parent_block_content.split(b":")
if len(found_parent_block_content) != 2:
err = f"Malformed parent dataset identifier {found_parent_block_content}"
logger.error(err)
return False, err, data
old_parent_namespace = found_parent_block_content[0].decode()
logger.debug("Parent dataset namespace: %s" % old_parent_namespace)
if re.search(ns_re_pattern, old_parent_namespace) is None:
new_parent_namespace = f"{old_parent_namespace}.{self._conf.env_string}"
data = re.sub(
str.encode(f'<mmd:related_dataset '
f'relation_type="parent">{old_parent_namespace}'),
str.encode(f'<mmd:related_dataset '
f'relation_type="parent">{new_parent_namespace}'),
data,
)

# Add landing page info
data_mod = self._add_landing_page(
data_mod, self._conf.catalog_url, self._file_metadata_id
data = self._add_landing_page(
data, self._conf.catalog_url, self._file_metadata_id
)

return valid, msg, data_mod
return valid, msg, data

def distribute(self):
"""Loop through all distributors listed in the config and call
Expand Down
1 change: 1 addition & 0 deletions dmci/distributors/pycsw_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def _read_response_text(self, key, text):
n_ins = "0"
n_upd = "0"
n_del = "0"

if root.tag == "{%s}ExceptionReport" % ns_ows:
node = root.find("{%s}Exception" % ns_ows, root.nsmap)
msg = "Unknown Error"
Expand Down
156 changes: 156 additions & 0 deletions tests/files/api/malformedparentid.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">
<mmd:metadata_identifier>test.no:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</mmd:metadata_identifier>
<mmd:title xml:lang="en">Direct Broadcast data processed in satellite swath to L1C</mmd:title>
<mmd:title xml:lang="nor">Direktesendte satellittdata prosessert i satellittsveip til L1C.</mmd:title>
<mmd:abstract xml:lang="en">Direct Broadcast data received at MET NORWAY Oslo. Processed by standard processing software to geolocated and calibrated values in satellite swath in received instrument resolution.</mmd:abstract>
<mmd:abstract xml:lang="nor">Direktesendte satellittdata mottatt ved Meteorologisk Institutt Oslo. Prosessert med standard prosesseringssoftware til geolokaliserte og kalibrerte verdier i satellitsveip i mottatt instrument oppløsning.</mmd:abstract>
<mmd:metadata_status>Active</mmd:metadata_status>
<mmd:dataset_production_status>Complete</mmd:dataset_production_status>
<mmd:collection>METNCS</mmd:collection>
<mmd:last_metadata_update>
<mmd:update>
<mmd:datetime>2021-04-29T00:46:05Z</mmd:datetime>
<mmd:type>Created</mmd:type>
</mmd:update>
</mmd:last_metadata_update>
<mmd:temporal_extent>
<mmd:start_date>2021-04-29T00:28:44.977627Z</mmd:start_date>
<mmd:end_date>2021-04-29T00:39:55.000000Z</mmd:end_date>
</mmd:temporal_extent>
<mmd:iso_topic_category>climatologyMeteorologyAtmosphere</mmd:iso_topic_category>
<mmd:iso_topic_category>environment</mmd:iso_topic_category>
<mmd:iso_topic_category>oceans</mmd:iso_topic_category>
<mmd:keywords vocabulary="GCMD">
<mmd:keyword>Earth Science &gt; Atmosphere &gt; Atmospheric radiation</mmd:keyword>
<mmd:resource>https://gcmdservices.gsfc.nasa.gov/static/kms/</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:keywords vocabulary="GEMET">
<mmd:keyword>Meteorological geographical features</mmd:keyword>
<mmd:keyword>Atmospheric conditions</mmd:keyword>
<mmd:keyword>Oceanographic geographical features</mmd:keyword>
<mmd:resource>http://inspire.ec.europa.eu/theme</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:keywords vocabulary="Norwegian thematic categories">
<mmd:keyword>Weather and climate</mmd:keyword>
<mmd:resource>https://register.geonorge.no/subregister/metadata-kodelister/kartverket/nasjonal-temainndeling</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:geographic_extent>
<mmd:rectangle srsName="EPSG:4326">
<mmd:north>80.49233</mmd:north>
<mmd:south>36.540688</mmd:south>
<mmd:east>79.40124</mmd:east>
<mmd:west>1.5549301</mmd:west>
</mmd:rectangle>
</mmd:geographic_extent>
<mmd:dataset_language>en</mmd:dataset_language>
<mmd:operational_status>Operational</mmd:operational_status>
<mmd:use_constraint>
<mmd:identifier>CC-BY-4.0</mmd:identifier>
<mmd:resource>http://spdx.org/licenses/CC-BY-4.0</mmd:resource>
</mmd:use_constraint>
<mmd:personnel>
<mmd:role>Technical contact</mmd:role>
<mmd:name>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:name>
<mmd:email>post@met.no</mmd:email>
<mmd:organisation>MET NORWAY</mmd:organisation>
</mmd:personnel>
<mmd:personnel>
<mmd:role>Metadata author</mmd:role>
<mmd:name>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:name>
<mmd:email>post@met.no</mmd:email>
<mmd:organisation>unknown</mmd:organisation>
</mmd:personnel>
<mmd:data_center>
<mmd:data_center_name>
<mmd:short_name>MET NORWAY</mmd:short_name>
<mmd:long_name>MET NORWAY</mmd:long_name>
</mmd:data_center_name>
<mmd:data_center_url>met.no</mmd:data_center_url>
</mmd:data_center>
<mmd:data_access>
<mmd:type>OPeNDAP</mmd:type>
<mmd:description>Open-source Project for a Network Data Access Protocol</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/dodsC/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:data_access>
<mmd:type>OGC WMS</mmd:type>
<mmd:description>OGC Web Mapping Service, URI to GetCapabilities Document.</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/wms/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc?service=WMS&amp;version=1.3.0&amp;request=GetCapabilities</mmd:resource>
<mmd:wms_layers>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
</mmd:wms_layers>
</mmd:data_access>
<mmd:data_access>
<mmd:type>HTTP</mmd:type>
<mmd:description>Direct download of file</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/fileServer/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:related_dataset relation_type="parent">64db6102-14ce-41e9-b93b-61dbb2cb8b4e</mmd:related_dataset>
<mmd:storage_information>
<mmd:file_name>aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_name>
<mmd:file_location>/lustre/storeB/immutable/archive/projects/remotesensing/satellite-thredds/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_location>
<mmd:file_format>NetCDF-CF</mmd:file_format>
<mmd:file_size unit="MB">1862.00</mmd:file_size>
<mmd:checksum type="md5sum">4e1833610272ee63228f575d1c875fbe</mmd:checksum>
</mmd:storage_information>
<mmd:project>
<mmd:short_name>Govermental core service</mmd:short_name>
<mmd:long_name>Govermental core service</mmd:long_name>
</mmd:project>
<mmd:platform>
<mmd:short_name>Aqua</mmd:short_name>
<mmd:long_name>Aqua</mmd:long_name>
<mmd:resource>https://www.wmo-sat.info/oscar/satellites/view/aqua</mmd:resource>
<mmd:instrument>
<mmd:short_name>MODIS</mmd:short_name>
<mmd:long_name>MODIS</mmd:long_name>
<mmd:resource>https://www.wmo-sat.info/oscar/instruments/view/modis</mmd:resource>
</mmd:instrument>
</mmd:platform>
<mmd:activity_type>Space Borne Instrument</mmd:activity_type>
<mmd:dataset_citation>
<mmd:author>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:author>
<mmd:publication_date>2021-04-29</mmd:publication_date>
<mmd:title>Direct Broadcast data processed in satellite swath to L1C</mmd:title>
</mmd:dataset_citation>
</mmd:mmd>
39 changes: 38 additions & 1 deletion tests/test_api/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from dmci.api import App

MOCK_XML = b"<xml />"
MOCK_XML_MOD = b"<xml mod />"


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -156,8 +157,24 @@ def testApiApp_InsertUpdateRequests(client, monkeypatch):
assert client.post("/v1/insert", data=MOCK_XML).status_code == 200
assert client.post("/v1/update", data=MOCK_XML).status_code == 200

mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Failed to persist", 666))
# Data is valid and gets modified by validate
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML_MOD))
assert client.post("/v1/insert", data=MOCK_XML).status_code == 200
assert client.post("/v1/update", data=MOCK_XML).status_code == 200

# first _persist_file fails
with monkeypatch.context() as mp:
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML))
mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Failed to write the file", 666))
assert client.post("/v1/insert", data=MOCK_XML).status_code == 666
assert client.post("/v1/update", data=MOCK_XML).status_code == 666

# first _persist_file works
with monkeypatch.context() as mp:
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML))
mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Everything is OK", 200))
assert client.post("/v1/insert", data=MOCK_XML).status_code == 200
assert client.post("/v1/update", data=MOCK_XML).status_code == 200

# Data is not valid
with monkeypatch.context() as mp:
Expand Down Expand Up @@ -194,6 +211,26 @@ def testApiApp_InsertUpdateRequests(client, monkeypatch):
# END Test testApiApp_InsertRequests


@pytest.mark.api
def testApiApp_PersistAgainAfterModification(client, monkeypatch):

outputs = iter([("Everything is OK", 200), ("Failure in persisting", 666),
("Everything is OK", 200), ("Failure in persisting", 666)])

@staticmethod
def fake_output(data, full_path):
return next(outputs)

with monkeypatch.context() as mp:
# Data is valid but failure to persist again after modifications
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML_MOD))
mp.setattr("dmci.api.app.App._persist_file", fake_output)
assert client.post("/v1/insert", data=MOCK_XML).status_code == 666
assert client.post("/v1/update", data=MOCK_XML).status_code == 666

# END Test testApiApp_PersistAgainAfterModification


@pytest.mark.api
def testApiApp_DeleteRequests(client, monkeypatch):
"""Test api delete request."""
Expand Down
54 changes: 51 additions & 3 deletions tests/test_api/test_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def testApiWorker_Distributor(tmpConf, mockXml, monkeypatch):
@pytest.mark.api
def testApiWorker_Validator(monkeypatch, filesDir):
"""Test the Worker class validator."""

xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd")
passFile = os.path.join(filesDir, "api", "passing.xml")
passFilewLP = os.path.join(filesDir, "api", "passing_wlandingpage.xml")
Expand All @@ -121,7 +122,7 @@ def testApiWorker_Validator(monkeypatch, filesDir):

# Invalid data format
passData = readFile(passFile)
assert passWorker.validate(passData) == (False, "Input must be bytes type")
assert passWorker.validate(passData) == (False, "Input must be bytes type", passData)

# Valid data format
with monkeypatch.context() as mp:
Expand Down Expand Up @@ -159,13 +160,25 @@ def testApiWorker_Validator(monkeypatch, filesDir):
assert isinstance(msg, str)
assert msg

# _check_information_content fails
with monkeypatch.context() as mp:
mp.setattr(Worker, "_check_information_content",
lambda *a: (False, "_check_information_content failed"))

passData = bytes(readFile(passFile), "utf-8")
valid, msg, passData = passWorker.validate(passData)
assert valid is False
assert isinstance(msg, str)
assert msg == "_check_information_content failed"


# END Test testApiWorker_Validator


@pytest.mark.api
def testApiWorker_NamespaceReplacement(monkeypatch, filesDir):
"""Test the replacement of the namespace with the one read from the config."""
def testApiWorker_NamespaceReplacement(filesDir):
"""Test the replacement of the namespace with the one customized for the environment."""

xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd")
passFile = os.path.join(filesDir, "api", "passing.xml")

Expand All @@ -189,6 +202,41 @@ def testApiWorker_NamespaceReplacement(monkeypatch, filesDir):

# END Test testApiWorker_NamespaceReplacement

@pytest.mark.api
def testApiWorker_ParentNamespaceReplacement(filesDir):
"""Test the replacement of the namespace in the parent dataset
with the one customized for the environment."""

xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd")
passFile = os.path.join(filesDir, "api", "passing.xml")

xsdObj = lxml.etree.XMLSchema(lxml.etree.parse(xsdFile))
passWorker = Worker("none", passFile, xsdObj)

passWorker._conf.env_string = "yolo"

# Valid XML
passData = bytes(readFile(passFile), "utf-8")
valid, msg, passData = passWorker.validate(passData)
assert valid is True

match_parent_id = re.search(
b'<mmd:related_dataset relation_type="parent">(.+?)</mmd:related_dataset>', passData
)
parent_id = match_parent_id.group(1)
namespace = parent_id.split(b":")[0].decode()
assert namespace == "test.no.yolo"

# Malformed parent dataset id
badparentidFile = os.path.join(filesDir, "api", "malformedparentid.xml")
badparentWorker = Worker("none", badparentidFile, xsdObj)
badparentData = bytes(readFile(badparentidFile), "utf-8")
valid, msg, data = badparentWorker.validate(badparentData)
assert valid is False
assert msg == "Malformed parent dataset identifier [b'64db6102-14ce-41e9-b93b-61dbb2cb8b4e']"

# END Test testApiWorker_NamespaceReplacement


@pytest.mark.api
def testApiWorker_CheckInfoContent(monkeypatch, filesDir):
Expand Down

0 comments on commit 4784d46

Please sign in to comment.