Skip to content

Commit

Permalink
split sync and archive functions, update README
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkralidis committed Mar 15, 2024
1 parent 9d6ce34 commit 5e233ed
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 84 deletions.
5 changes: 2 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
#
###############################################################################

DOCKER_COMPOSE_ARGS=--file docker/docker-compose.yml --file docker/docker-compose.override.yml --project-name wis2-gdc
DOCKER_COMPOSE_DEV_ARGS=--file docker/docker-compose.yml --file docker/docker-compose.override.yml --file docker/docker-compose.dev.yml --project-name wis2-gdc
DOCKER_COMPOSE_ARGS=--project-name wis2-gdc --file docker/docker-compose.yml --file docker/docker-compose.override.yml

build:
docker compose $(DOCKER_COMPOSE_ARGS) build
Expand All @@ -29,7 +28,7 @@ up:
docker compose $(DOCKER_COMPOSE_ARGS) up --detach

dev:
docker compose $(DOCKER_COMPOSE_DEV_ARGS) up --detach
docker compose $(DOCKER_COMPOSE_ARGS) --file docker/docker-compose.dev.yml up --detach

login:
docker exec -it wis2-gdc-management /bin/bash
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ wis2-gdc is a Reference Implementation of a WIS2 Global Discovery Catalogue.

## Workflow

- connects to a WIS2 Global Broker, subscribed to the following:
- connects to a WIS2 Global Broker, subscribed to the following topic:
- `origin/a/wis2/+/metadata/#`
- on discovery metadata notifications, run the WCMP2 ATS via [pywcmp](/~https://github.com/wmo-im/pywcmp)
- ETS
- KPIs
- publish ETS and KPI reports to local broker under `gdc-reports`
- on discovery metadata notifications, run the WCMP2 ETS and KPIs via [pywcmp](/~https://github.com/wmo-im/pywcmp)
- publish ETS and KPI reports to local broker under `monitor/a/wis2/$WIS2_GDC_CENTRE_ID/centre-id`
- publish to a WIS2 GDC (OGC API - Records) using one of the supported transaction backends:
- [OGC API - Features - Part 4: Create, Replace, Update and Delete](https://docs.ogc.org/DRAFTS/20-002.html)
- Elasticsearch direct (default)
- collect real-time and offline GDC metrics and make them available as [OpenMetrics](https://openmetrics.io)
- produce a metadata zipfile for download (daily)

## Installation

Expand Down
11 changes: 5 additions & 6 deletions docker/wis2-gdc-api/wis2-gdc-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,6 @@ resources:
crs:
- CRS84
links:
- type: text/html
rel: canonical
title: WMO Information System (WIS) | World Meteorological Organization
href: https://community.wmo.int/en/activity-areas/wis
hreflang: en-CA
- type: application/zip
rel: enclosure
title: WIS2 discovery metadata archive (generated daily)
Expand All @@ -80,7 +75,11 @@ resources:
title: WIS2 Global Discovery Catalogue metrics
href: ${WIS2_GDC_API_URL}/wis2-gdc-metrics.txt
hreflang: en-CA
extents:
- type: text/html
rel: canonical
title: WMO Information System (WIS) | World Meteorological Organization
href: https://community.wmo.int/en/activity-areas/wis
hreflang: en-CA
extents:
spatial:
bbox: [-180, -90, 180, 90]
Expand Down
3 changes: 2 additions & 1 deletion wis2_gdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@

from wis2_gdc.registrar import register, setup, teardown
from wis2_gdc.monitor import monitor
from wis2_gdc.sync import archive, sync
from wis2_gdc.archive import archive
from wis2_gdc.sync import sync

__version__ = '0.1.dev0'

Expand Down
103 changes: 103 additions & 0 deletions wis2_gdc/archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
###############################################################################
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
###############################################################################

import json
import logging
import zipfile

import click
import requests
from typing import Union

from pywis_pubsub import cli_options
from pywis_pubsub.mqtt import MQTTPubSubClient

from wis2_gdc.env import API_URL, API_URL_DOCKER, BROKER_URL, CENTRE_ID

LOGGER = logging.getLogger(__name__)


def archive_metadata(url: str, archive_zipfile: str) -> None:
"""
Archive all discovery metadata from a GDC to an archive zipfile
:param url: `str` of GDC API URL
:param archive_zipfile: `str` of filename of zipfile
:returns: `None`
"""

def _get_next_link(links) -> Union[str, None]:
"""
Inner helper function to derive rel=next link from GDC response
:param links: `list` of links array
:returns: `str` of next link or `None`
"""

for link in links:
if link['rel'] == 'next':
return link['href']

return None

end = False
gdc_items_url = f'{url}/collections/wis2-discovery-metadata/items'
response = None

with zipfile.ZipFile(archive_zipfile, 'w') as zf:
while not end:
if response is None:
gdc_items_url2 = gdc_items_url
else:
gdc_items_url2 = _get_next_link(response['links'])

LOGGER.info(f'Querying GDC with {gdc_items_url2}')
response = requests.get(gdc_items_url2).json()

for feature in response['features']:
LOGGER.debug(f"Saving {feature['id']} to archive")
filename = f"{feature['id']}.json"
zf.writestr(filename, json.dumps(feature))

if _get_next_link(response['links']) is None:
end = True

msg = {
'type': 'archive-published',
'description': f'Archive published at {API_URL}/wis2-gdc-archive.zip'
}

m = MQTTPubSubClient(BROKER_URL)
m.pub(f'monitor/a/wis2/{CENTRE_ID}', json.dumps(msg))
m.close()


@click.command()
@click.pass_context
@click.argument('archive-zipfile')
@cli_options.OPTION_VERBOSITY
def archive(ctx, archive_zipfile, verbosity='NOTSET'):
"""Archive discovery metadata records"""

click.echo(f'Achiving metadata from GDC {API_URL}')
archive_metadata(API_URL_DOCKER, archive_zipfile)
69 changes: 0 additions & 69 deletions wis2_gdc/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,86 +19,17 @@
#
###############################################################################

import json
import logging
import zipfile

import click
import requests
from typing import Union

from pywis_pubsub import cli_options
from pywis_pubsub.mqtt import MQTTPubSubClient

from wis2_gdc.env import API_URL, API_URL_DOCKER, BROKER_URL
from wis2_gdc.harvester import HARVESTERS

LOGGER = logging.getLogger(__name__)


def archive_metadata(url: str, archive_zipfile: str) -> None:
"""
Archive all discovery metadata from a GDC to an archive zipfile
:param url: `str` of GDC API URL
:param archive_zipfile: `str` of filename of zipfile
:returns: `None`
"""

def _get_next_link(links) -> Union[str, None]:
"""
Inner helper function to derive rel=next link from GDC response
:param links: `list` of links array
:returns: `str` of next link or `None`
"""

for link in links:
if link['rel'] == 'next':
return link['href']

return None

end = False
gdc_items_url = f'{url}/collections/wis2-discovery-metadata/items'
response = None

with zipfile.ZipFile(archive_zipfile, 'w') as zf:
while not end:
if response is None:
gdc_items_url2 = gdc_items_url
else:
gdc_items_url2 = _get_next_link(response['links'])

LOGGER.info(f'Querying GDC with {gdc_items_url2}')
response = requests.get(gdc_items_url2).json()

for feature in response['features']:
LOGGER.debug(f"Saving {feature['id']} to archive")
filename = f"{feature['id']}.json"
zf.writestr(filename, json.dumps(feature))

if _get_next_link(response['links']) is None:
end = True

m = MQTTPubSubClient(BROKER_URL)
m.pub('gdc-reports/archive', f'Archive published at {API_URL}/wis2-gdc-archive.zip') # noqa
m.close()


@click.command()
@click.pass_context
@click.argument('archive-zipfile')
@cli_options.OPTION_VERBOSITY
def archive(ctx, archive_zipfile, verbosity='NOTSET'):
"""Archive discovery metadata records"""

click.echo(f'Achiving metadata from GDC {API_URL}')
archive_metadata(API_URL_DOCKER, archive_zipfile)


@click.command
@click.argument('harvest_type', nargs=1,
type=click.Choice(list(HARVESTERS.keys())))
Expand Down

0 comments on commit 5e233ed

Please sign in to comment.