Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue 152 #155

Merged
merged 17 commits into from
Apr 27, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 36 additions & 6 deletions dmci/api/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,30 +87,60 @@ def validate(self, data):
# Check xml file against XML schema definition
valid = self._xsd_obj.validate(etree.fromstring(data))
msg = repr(self._xsd_obj.error_log)
data_mod = data

if valid:
# Check information content
valid, msg = self._check_information_content(data)
# Append env string to namespace in data
if not valid:
return valid, msg, data

if self._conf.env_string:

# Append env string to namespace in metadata_identifier
logger.debug("Identifier namespace: %s" % self._namespace)
logger.debug("Environment customization %s" % self._conf.env_string)
ns_re_pattern = re.compile(r"\w.\w."+self._conf.env_string)
logger.debug(re.search(ns_re_pattern, self._namespace))

if re.search(ns_re_pattern, self._namespace) is None:
full_namespace = f"{self._namespace}.{self._conf.env_string}"
data_mod = re.sub(
data = re.sub(
str.encode(f"<mmd:metadata_identifier>{self._namespace}"),
str.encode(f"<mmd:metadata_identifier>{full_namespace}"),
data,
)
self._namespace = full_namespace

# Append env string to the namespace in the parent block, if present
if bool(re.search(b'<mmd:related_dataset relation_type="parent">', data)):
match_parent_block = re.search(
b'<mmd:related_dataset relation_type="parent">(.+?)</mmd:related_dataset>',
data
)
found_parent_block_content = match_parent_block.group(1)
found_parent_block_content = found_parent_block_content.split(b":")
if len(found_parent_block_content) != 2:
err = f"Malformed parent dataset identifier {found_parent_block_content}"
logger.error(err)
return False, err
old_parent_namespace = found_parent_block_content[0].decode()
logger.debug("Parent dataset namespace: %s" % old_parent_namespace)
if re.search(ns_re_pattern, old_parent_namespace) is None:
new_parent_namespace = f"{old_parent_namespace}.{self._conf.env_string}"
data = re.sub(
str.encode(f'<mmd:related_dataset '
f'relation_type="parent">{old_parent_namespace}'),
str.encode(f'<mmd:related_dataset '
f'relation_type="parent">{new_parent_namespace}'),
data,
)

# Add landing page info
data_mod = self._add_landing_page(
data_mod, self._conf.catalog_url, self._file_metadata_id
data = self._add_landing_page(
data, self._conf.catalog_url, self._file_metadata_id
)

return valid, msg, data_mod
return valid, msg, data

def distribute(self):
"""Loop through all distributors listed in the config and call
Expand Down
1 change: 1 addition & 0 deletions dmci/distributors/pycsw_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def _read_response_text(self, key, text):
n_ins = "0"
n_upd = "0"
n_del = "0"

if root.tag == "{%s}ExceptionReport" % ns_ows:
node = root.find("{%s}Exception" % ns_ows, root.nsmap)
msg = "Unknown Error"
Expand Down
156 changes: 156 additions & 0 deletions tests/files/api/malformedparentid.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">
<mmd:metadata_identifier>test.no:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</mmd:metadata_identifier>
<mmd:title xml:lang="en">Direct Broadcast data processed in satellite swath to L1C</mmd:title>
<mmd:title xml:lang="nor">Direktesendte satellittdata prosessert i satellittsveip til L1C.</mmd:title>
<mmd:abstract xml:lang="en">Direct Broadcast data received at MET NORWAY Oslo. Processed by standard processing software to geolocated and calibrated values in satellite swath in received instrument resolution.</mmd:abstract>
<mmd:abstract xml:lang="nor">Direktesendte satellittdata mottatt ved Meteorologisk Institutt Oslo. Prosessert med standard prosesseringssoftware til geolokaliserte og kalibrerte verdier i satellitsveip i mottatt instrument oppløsning.</mmd:abstract>
<mmd:metadata_status>Active</mmd:metadata_status>
<mmd:dataset_production_status>Complete</mmd:dataset_production_status>
<mmd:collection>METNCS</mmd:collection>
<mmd:last_metadata_update>
<mmd:update>
<mmd:datetime>2021-04-29T00:46:05Z</mmd:datetime>
<mmd:type>Created</mmd:type>
</mmd:update>
</mmd:last_metadata_update>
<mmd:temporal_extent>
<mmd:start_date>2021-04-29T00:28:44.977627Z</mmd:start_date>
<mmd:end_date>2021-04-29T00:39:55.000000Z</mmd:end_date>
</mmd:temporal_extent>
<mmd:iso_topic_category>climatologyMeteorologyAtmosphere</mmd:iso_topic_category>
<mmd:iso_topic_category>environment</mmd:iso_topic_category>
<mmd:iso_topic_category>oceans</mmd:iso_topic_category>
<mmd:keywords vocabulary="GCMD">
<mmd:keyword>Earth Science &gt; Atmosphere &gt; Atmospheric radiation</mmd:keyword>
<mmd:resource>https://gcmdservices.gsfc.nasa.gov/static/kms/</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:keywords vocabulary="GEMET">
<mmd:keyword>Meteorological geographical features</mmd:keyword>
<mmd:keyword>Atmospheric conditions</mmd:keyword>
<mmd:keyword>Oceanographic geographical features</mmd:keyword>
<mmd:resource>http://inspire.ec.europa.eu/theme</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:keywords vocabulary="Norwegian thematic categories">
<mmd:keyword>Weather and climate</mmd:keyword>
<mmd:resource>https://register.geonorge.no/subregister/metadata-kodelister/kartverket/nasjonal-temainndeling</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:geographic_extent>
<mmd:rectangle srsName="EPSG:4326">
<mmd:north>80.49233</mmd:north>
<mmd:south>36.540688</mmd:south>
<mmd:east>79.40124</mmd:east>
<mmd:west>1.5549301</mmd:west>
</mmd:rectangle>
</mmd:geographic_extent>
<mmd:dataset_language>en</mmd:dataset_language>
<mmd:operational_status>Operational</mmd:operational_status>
<mmd:use_constraint>
<mmd:identifier>CC-BY-4.0</mmd:identifier>
<mmd:resource>http://spdx.org/licenses/CC-BY-4.0</mmd:resource>
</mmd:use_constraint>
<mmd:personnel>
<mmd:role>Technical contact</mmd:role>
<mmd:name>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:name>
<mmd:email>post@met.no</mmd:email>
<mmd:organisation>MET NORWAY</mmd:organisation>
</mmd:personnel>
<mmd:personnel>
<mmd:role>Metadata author</mmd:role>
<mmd:name>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:name>
<mmd:email>post@met.no</mmd:email>
<mmd:organisation>unknown</mmd:organisation>
</mmd:personnel>
<mmd:data_center>
<mmd:data_center_name>
<mmd:short_name>MET NORWAY</mmd:short_name>
<mmd:long_name>MET NORWAY</mmd:long_name>
</mmd:data_center_name>
<mmd:data_center_url>met.no</mmd:data_center_url>
</mmd:data_center>
<mmd:data_access>
<mmd:type>OPeNDAP</mmd:type>
<mmd:description>Open-source Project for a Network Data Access Protocol</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/dodsC/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:data_access>
<mmd:type>OGC WMS</mmd:type>
<mmd:description>OGC Web Mapping Service, URI to GetCapabilities Document.</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/wms/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc?service=WMS&amp;version=1.3.0&amp;request=GetCapabilities</mmd:resource>
<mmd:wms_layers>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
</mmd:wms_layers>
</mmd:data_access>
<mmd:data_access>
<mmd:type>HTTP</mmd:type>
<mmd:description>Direct download of file</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/fileServer/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:related_dataset relation_type="parent">64db6102-14ce-41e9-b93b-61dbb2cb8b4e</mmd:related_dataset>
<mmd:storage_information>
<mmd:file_name>aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_name>
<mmd:file_location>/lustre/storeB/immutable/archive/projects/remotesensing/satellite-thredds/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_location>
<mmd:file_format>NetCDF-CF</mmd:file_format>
<mmd:file_size unit="MB">1862.00</mmd:file_size>
<mmd:checksum type="md5sum">4e1833610272ee63228f575d1c875fbe</mmd:checksum>
</mmd:storage_information>
<mmd:project>
<mmd:short_name>Govermental core service</mmd:short_name>
<mmd:long_name>Govermental core service</mmd:long_name>
</mmd:project>
<mmd:platform>
<mmd:short_name>Aqua</mmd:short_name>
<mmd:long_name>Aqua</mmd:long_name>
<mmd:resource>https://www.wmo-sat.info/oscar/satellites/view/aqua</mmd:resource>
<mmd:instrument>
<mmd:short_name>MODIS</mmd:short_name>
<mmd:long_name>MODIS</mmd:long_name>
<mmd:resource>https://www.wmo-sat.info/oscar/instruments/view/modis</mmd:resource>
</mmd:instrument>
</mmd:platform>
<mmd:activity_type>Space Borne Instrument</mmd:activity_type>
<mmd:dataset_citation>
<mmd:author>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:author>
<mmd:publication_date>2021-04-29</mmd:publication_date>
<mmd:title>Direct Broadcast data processed in satellite swath to L1C</mmd:title>
</mmd:dataset_citation>
</mmd:mmd>
39 changes: 38 additions & 1 deletion tests/test_api/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from dmci.api import App

MOCK_XML = b"<xml />"
MOCK_XML_MOD = b"<xml mod />"


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -156,8 +157,24 @@ def testApiApp_InsertUpdateRequests(client, monkeypatch):
assert client.post("/v1/insert", data=MOCK_XML).status_code == 200
assert client.post("/v1/update", data=MOCK_XML).status_code == 200

mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Failed to persist", 666))
# Data is valid and gets modified by validate
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML_MOD))
assert client.post("/v1/insert", data=MOCK_XML).status_code == 200
assert client.post("/v1/update", data=MOCK_XML).status_code == 200

# first _persist_file fails
with monkeypatch.context() as mp:
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML))
mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Failed to write the file", 666))
assert client.post("/v1/insert", data=MOCK_XML).status_code == 666
assert client.post("/v1/update", data=MOCK_XML).status_code == 666

# first _persist_file works
with monkeypatch.context() as mp:
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML))
mp.setattr("dmci.api.app.App._persist_file", lambda *a: ("Everything is OK", 200))
assert client.post("/v1/insert", data=MOCK_XML).status_code == 200
assert client.post("/v1/update", data=MOCK_XML).status_code == 200

# Data is not valid
with monkeypatch.context() as mp:
Expand Down Expand Up @@ -194,6 +211,26 @@ def testApiApp_InsertUpdateRequests(client, monkeypatch):
# END Test testApiApp_InsertRequests


@pytest.mark.api
def testApiApp_PersistAgainAfterModification(client, monkeypatch):

outputs = iter([("Everything is OK", 200), ("Failure in persisting", 666),
("Everything is OK", 200), ("Failure in persisting", 666)])

@staticmethod
def fake_output(data, full_path):
return next(outputs)

with monkeypatch.context() as mp:
# Data is valid but failure to persist again after modifications
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "", MOCK_XML_MOD))
mp.setattr("dmci.api.app.App._persist_file", fake_output)
assert client.post("/v1/insert", data=MOCK_XML).status_code == 666
assert client.post("/v1/update", data=MOCK_XML).status_code == 666

# END Test testApiApp_PersistAgainAfterModification


@pytest.mark.api
def testApiApp_DeleteRequests(client, monkeypatch):
"""Test api delete request."""
Expand Down
52 changes: 50 additions & 2 deletions tests/test_api/test_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def testApiWorker_Distributor(tmpConf, mockXml, monkeypatch):
@pytest.mark.api
def testApiWorker_Validator(monkeypatch, filesDir):
"""Test the Worker class validator."""

xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd")
passFile = os.path.join(filesDir, "api", "passing.xml")
passFilewLP = os.path.join(filesDir, "api", "passing_wlandingpage.xml")
Expand Down Expand Up @@ -159,13 +160,25 @@ def testApiWorker_Validator(monkeypatch, filesDir):
assert isinstance(msg, str)
assert msg

# _check_information_content fails
with monkeypatch.context() as mp:
mp.setattr(Worker, "_check_information_content",
lambda *a: (False, "_check_information_content failed"))

passData = bytes(readFile(passFile), "utf-8")
valid, msg, passData = passWorker.validate(passData)
assert valid is False
assert isinstance(msg, str)
assert msg == "_check_information_content failed"


# END Test testApiWorker_Validator


@pytest.mark.api
def testApiWorker_NamespaceReplacement(monkeypatch, filesDir):
"""Test the replacement of the namespace with the one read from the config."""
def testApiWorker_NamespaceReplacement(filesDir):
"""Test the replacement of the namespace with the one customized for the environment."""

xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd")
passFile = os.path.join(filesDir, "api", "passing.xml")

Expand All @@ -189,6 +202,41 @@ def testApiWorker_NamespaceReplacement(monkeypatch, filesDir):

# END Test testApiWorker_NamespaceReplacement

@pytest.mark.api
def testApiWorker_ParentNamespaceReplacement(filesDir):
"""Test the replacement of the namespace in the parent dataset
with the one customized for the environment."""

xsdFile = os.path.join(filesDir, "mmd", "mmd.xsd")
passFile = os.path.join(filesDir, "api", "passing.xml")

xsdObj = lxml.etree.XMLSchema(lxml.etree.parse(xsdFile))
passWorker = Worker("none", passFile, xsdObj)

passWorker._conf.env_string = "yolo"

# Valid XML
passData = bytes(readFile(passFile), "utf-8")
valid, msg, passData = passWorker.validate(passData)
assert valid is True

match_parent_id = re.search(
b'<mmd:related_dataset relation_type="parent">(.+?)</mmd:related_dataset>', passData
)
parent_id = match_parent_id.group(1)
namespace = parent_id.split(b":")[0].decode()
assert namespace == "test.no.yolo"

# Malformed parent dataset id
badparentidFile = os.path.join(filesDir, "api", "malformedparentid.xml")
badparentWorker = Worker("none", badparentidFile, xsdObj)
badparentData = bytes(readFile(badparentidFile), "utf-8")
valid, msg = badparentWorker.validate(badparentData)
assert valid is False
assert msg == "Malformed parent dataset identifier [b'64db6102-14ce-41e9-b93b-61dbb2cb8b4e']"

# END Test testApiWorker_NamespaceReplacement


@pytest.mark.api
def testApiWorker_CheckInfoContent(monkeypatch, filesDir):
Expand Down