Skip to content

Commit

Permalink
Merge pull request #128 from metno/fix-issue-127
Browse files Browse the repository at this point in the history
enforce namespace presence
  • Loading branch information
charlienegri authored Feb 14, 2023
2 parents 1a68220 + c3c27a3 commit ab90485
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 42 deletions.
19 changes: 10 additions & 9 deletions dmci/api/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _check_information_content(self, data):
xml_doc = etree.fromstring(data)
valid = self._extract_metadata_id(xml_doc)
if not valid:
return False, "Input MMD XML file has no valid UUID metadata_identifier"
return False, "Input MMD XML file has no valid uri:UUID metadata_identifier"

# Check XML file
logger.info("Performing in depth checking.")
Expand All @@ -176,27 +176,28 @@ def _extract_metadata_id(self, xml_doc):
the class variable.
"""
self._file_metadata_id = None
self._namespace = None
fileUUID = None
namespace = ""
namespace = None
for xml_entry in xml_doc:
local = etree.QName(xml_entry)
if local.localname == "metadata_identifier":
# If uri:UUID, get UUID and uri, if no 'uri:' get UUID
# only accept if format is uri:UUID, both need to be present
words = xml_entry.text.split(":")
if len(words) < 2:
fileUUID = words[0]
elif len(words) == 2:
namespace, fileUUID = words
else:
if len(words) != 2:
logger.warning("metadata_identifier not formed as namespace:UUID")
return False
namespace, fileUUID = words

logger.info("XML file metadata_identifier namespace:%s" % namespace)
logger.info("XML file metadata_identifier UUID: %s" % fileUUID)
break

if fileUUID is None:
logger.warning("No metadata_identifier found in XML file")
logger.warning("No UUID found in XML file")
return False
if namespace is None:
logger.warning("No namespace found in XML file")
return False

try:
Expand Down
2 changes: 1 addition & 1 deletion tests/files/api/aqua-modis-parent.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">
<mmd:metadata_identifier>64db6102-14ce-41e9-b93b-61dbb2cb8b4e</mmd:metadata_identifier>
<mmd:metadata_identifier>test.no:64db6102-14ce-41e9-b93b-61dbb2cb8b4e</mmd:metadata_identifier>
<mmd:title xml:lang="en">Direct Broadcast data processed in satellite swath to L1C.</mmd:title>
<mmd:title xml:lang="no">Direktesendte satellittdata prosessert i satellittsveip til L1C.</mmd:title>
<mmd:abstract xml:lang="en">Direct Broadcast data received at MET NORWAY Oslo. Processed by standard processing software to geolocated and calibrated values in satellite swath in received instrument resolution.</mmd:abstract>
Expand Down
4 changes: 2 additions & 2 deletions tests/files/api/failing.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">
<mmd:metadata_identifier>3facf8ba-bf35-491e-b740-notvaliduuid</mmd:metadata_identifier>
<mmd:metadata_identifier>met.no:3facf8ba-bf35-491e-b740-notvaliduuid</mmd:metadata_identifier>
<mmd:title xml:lang="en">Direct Broadcast data processed in satellite swath to L1C</mmd:title>
<mmd:title xml:lang="nor">Direktesendte satellittdata prosessert i satellittsveip til L1C.</mmd:title>
<mmd:abstract xml:lang="en">Direct Broadcast data received at MET NORWAY Oslo. Processed by standard processing software to geolocated and calibrated values in satellite swath in received instrument resolution.</mmd:abstract>
Expand Down Expand Up @@ -145,4 +145,4 @@
<mmd:publication_date>2020-05-13</mmd:publication_date>
<mmd:title>Direct Broadcast data processed in satellite swath to L1C</mmd:title>
</mmd:dataset_citation>
</mmd:mmd>
</mmd:mmd>
4 changes: 2 additions & 2 deletions tests/files/api/passing.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">
<mmd:metadata_identifier>a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</mmd:metadata_identifier>
<mmd:metadata_identifier>test.no:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</mmd:metadata_identifier>
<mmd:title xml:lang="en">Direct Broadcast data processed in satellite swath to L1C</mmd:title>
<mmd:title xml:lang="nor">Direktesendte satellittdata prosessert i satellittsveip til L1C.</mmd:title>
<mmd:abstract xml:lang="en">Direct Broadcast data received at MET NORWAY Oslo. Processed by standard processing software to geolocated and calibrated values in satellite swath in received instrument resolution.</mmd:abstract>
Expand Down Expand Up @@ -125,7 +125,7 @@
<mmd:description>Direct download of file</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/fileServer/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:related_dataset relation_type="parent">64db6102-14ce-41e9-b93b-61dbb2cb8b4e</mmd:related_dataset>
<mmd:related_dataset relation_type="parent">test.no:64db6102-14ce-41e9-b93b-61dbb2cb8b4e</mmd:related_dataset>
<mmd:storage_information>
<mmd:file_name>aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_name>
<mmd:file_location>/lustre/storeB/immutable/archive/projects/remotesensing/satellite-thredds/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_location>
Expand Down
18 changes: 9 additions & 9 deletions tests/files/mmd/parent-uuid-list.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<parent>
<id>64db6102-14ce-41e9-b93b-61dbb2cb8b4e</id>
<id>bc82c179-144e-415a-8dd2-64d3569a8d50</id>
<id>da280021-13d8-425e-9783-64911d772397</id>
<id>d42548cc-337f-4005-91f4-a5dc306244b0</id>
<id>e1d0863d-71d3-4b9f-bb17-2af42d4956e7</id>
<id>6827f045-36c1-4678-a0bf-d91b41f8eefb</id>
<id>c7f8731b-5cfe-4cb5-ac57-168a19a2957b</id>
<id>f6cbb81c-1ce1-4080-b242-819e59cee78d</id>
<id>8e5ec6e8-f0ac-47cc-a869-44373c204848</id>
<id>test.no:64db6102-14ce-41e9-b93b-61dbb2cb8b4e</id>
<id>test.no:bc82c179-144e-415a-8dd2-64d3569a8d50</id>
<id>test.no:da280021-13d8-425e-9783-64911d772397</id>
<id>test.no:d42548cc-337f-4005-91f4-a5dc306244b0</id>
<id>test.no:e1d0863d-71d3-4b9f-bb17-2af42d4956e7</id>
<id>test.no:6827f045-36c1-4678-a0bf-d91b41f8eefb</id>
<id>test.no:c7f8731b-5cfe-4cb5-ac57-168a19a2957b</id>
<id>test.no:f6cbb81c-1ce1-4080-b242-819e59cee78d</id>
<id>test.no:8e5ec6e8-f0ac-47cc-a869-44373c204848</id>
</parent>
4 changes: 2 additions & 2 deletions tests/files/reference/parent_translated.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0"?>
<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:gml="http://www.opengis.net/gml" xmlns:gmx="http://www.isotc211.org/2005/gmx" xmlns:gco="http://www.isotc211.org/2005/gco" xsi:schemaLocation="http://www.isotc211.org/2005/gmd http://schemas.opengis.net/iso/19139/20060504/gmd/gmd.xsd http://www.isotc211.org/2005/gmx http://schemas.opengis.net/iso/19139/20060504/gmx/gmx.xsd">
<gmd:fileIdentifier>
<gco:CharacterString>64db6102-14ce-41e9-b93b-61dbb2cb8b4e</gco:CharacterString>
<gco:CharacterString>test.no:64db6102-14ce-41e9-b93b-61dbb2cb8b4e</gco:CharacterString>
</gmd:fileIdentifier>
<gmd:language>
<gmd:LanguageCode codeList="http://www.loc.gov/standards/iso639-2" codeListValue="eng">English</gmd:LanguageCode>
Expand Down Expand Up @@ -96,7 +96,7 @@
<gmd:identifier>
<gmd:MD_Identifier>
<gmd:code>
<gco:CharacterString>64db6102-14ce-41e9-b93b-61dbb2cb8b4e</gco:CharacterString>
<gco:CharacterString>test.no:64db6102-14ce-41e9-b93b-61dbb2cb8b4e</gco:CharacterString>
</gmd:code>
</gmd:MD_Identifier>
</gmd:identifier>
Expand Down
6 changes: 3 additions & 3 deletions tests/files/reference/pycsw_dist_translated.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:gml="http://www.opengis.net/gml" xmlns:gmx="http://www.isotc211.org/2005/gmx" xmlns:gco="http://www.isotc211.org/2005/gco" xsi:schemaLocation="http://www.isotc211.org/2005/gmd http://schemas.opengis.net/iso/19139/20060504/gmd/gmd.xsd http://www.isotc211.org/2005/gmx http://schemas.opengis.net/iso/19139/20060504/gmx/gmx.xsd">
<gmd:fileIdentifier>
<gco:CharacterString>a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</gco:CharacterString>
<gco:CharacterString>test.no:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</gco:CharacterString>
</gmd:fileIdentifier>
<gmd:language>
<gmd:LanguageCode codeList="http://www.loc.gov/standards/iso639-2" codeListValue="eng">English</gmd:LanguageCode>
Expand All @@ -10,7 +10,7 @@
<gmd:MD_CharacterSetCode codeListValue="utf8" codeList="http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/ML_gmxCodelists.xml#MD_CharacterSetCode"/>
</gmd:characterSet>
<gmd:parentIdentifier>
<gco:CharacterString>64db6102-14ce-41e9-b93b-61dbb2cb8b4e</gco:CharacterString>
<gco:CharacterString>test.no:64db6102-14ce-41e9-b93b-61dbb2cb8b4e</gco:CharacterString>
</gmd:parentIdentifier>
<gmd:hierarchyLevel>
<gmd:MD_ScopeCode codeList="http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/Codelist/ML_gmxCodelists.xml#MD_ScopeCode" codeListValue="dataset"/>
Expand Down Expand Up @@ -91,7 +91,7 @@
<gmd:identifier>
<gmd:MD_Identifier>
<gmd:code>
<gco:CharacterString>a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</gco:CharacterString>
<gco:CharacterString>test.no:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</gco:CharacterString>
</gmd:code>
</gmd:MD_Identifier>
</gmd:identifier>
Expand Down
28 changes: 14 additions & 14 deletions tests/test_api/test_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,14 @@ def testApiWorker_CheckInfoContent(monkeypatch, filesDir):
failFile = os.path.join(filesDir, "api", "failing.xml")
failData = bytes(readFile(failFile), "utf-8")
assert tstWorker._check_information_content(failData) == (
False, "Input MMD XML file has no valid UUID metadata_identifier"
False, "Input MMD XML file has no valid uri:UUID metadata_identifier"
)

# Check Error report
failFile = os.path.join(filesDir, "api", "failing.xml")
failData = (
b"<root>"
b" <metadata_identifier>00000000-0000-0000-0000-000000000000</metadata_identifier>"
b" <metadata_identifier>met.no:00000000-0000-0000-0000-000000000000</metadata_identifier>"
b" <resource>imap://met.no</resource>"
b" <geographic_extent>"
b" <rectangle>"
Expand Down Expand Up @@ -204,51 +204,51 @@ def testApiWorker_ExtractMetaDataID(filesDir, mockXml):
passFile = os.path.join(filesDir, "api", "passing.xml")
failFile = os.path.join(filesDir, "api", "failing.xml")

namespaced_UUID = (
no_namespaced_UUID = (
'<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">'
' <mmd:metadata_identifier>%s</mmd:metadata_identifier>'
'</mmd:mmd>'
)%("test.no:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b")
)%("a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b")

namespaced_UUID_bad = (
'<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">'
' <mmd:metadata_identifier>%s</mmd:metadata_identifier>'
'</mmd:mmd>'
)%("test:no:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b")

# Valid File with no Namespace
# Valid File with Namespace
passXML = lxml.etree.fromstring(bytes(readFile(passFile), "utf-8"))
tstWorker = Worker("insert", passFile, None)
assert tstWorker._extract_metadata_id(passXML) is True
assert tstWorker._namespace == "test.no"
assert tstWorker._file_metadata_id is not None
assert tstWorker._namespace == ""

# Valid mmd with namespace
passXML = lxml.etree.fromstring(bytes(namespaced_UUID, "utf-8"))
# Valid mmd with no namespace
passXML = lxml.etree.fromstring(bytes(no_namespaced_UUID, "utf-8"))
tstWorker = Worker("insert", passFile, None)
assert tstWorker._extract_metadata_id(passXML) is True
assert tstWorker._file_metadata_id is not None
assert tstWorker._namespace == "test.no"
assert tstWorker._extract_metadata_id(passXML) is False
assert tstWorker._file_metadata_id is None
assert tstWorker._namespace is None

# MMD with invalid namespace
passXML = lxml.etree.fromstring(bytes(namespaced_UUID_bad, "utf-8"))
tstWorker = Worker("insert", passFile, None)
assert tstWorker._extract_metadata_id(passXML) is False
assert tstWorker._file_metadata_id is None
assert tstWorker._namespace == ""
assert tstWorker._namespace is None

# Invalid File
mockData = lxml.etree.fromstring(bytes(readFile(mockXml), "utf-8"))
tstWorker = Worker("insert", mockXml, None)
assert tstWorker._extract_metadata_id(mockData) is False
assert tstWorker._file_metadata_id is None
assert tstWorker._namespace == ""
assert tstWorker._namespace is None

# Invalid UUID
failXML = lxml.etree.fromstring(bytes(readFile(failFile), "utf-8"))
tstWorker = Worker("insert", failFile, None)
assert tstWorker._extract_metadata_id(failXML) is False
assert tstWorker._file_metadata_id is None
assert tstWorker._namespace == ""
assert tstWorker._namespace is None

# END Test testApiWorker_ExtractMetaDataID

0 comments on commit ab90485

Please sign in to comment.