Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for linting in-memory dictionaries #94

Merged
merged 12 commits into from
Nov 29, 2022
Merged
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file.
The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/).

## Unreleased
## Added
- Ability to lint dictionaries /~https://github.com/stac-utils/stac-check/pull/94

## [v1.3.1] - 2022-10-05
## Changed
Expand Down
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ or for local development
`$ pip install -e .`

---
### Usage
### CLI Usage
```
Usage: stac-check [OPTIONS] FILE

Expand All @@ -34,7 +34,18 @@ $ make build
$ make shell
```
---
### Examples
### Lint Dictionary

```
from stac_linter.lint import Linter

linter = Linter(dict)

for k,v in linter.create_best_practices_dict().items():
print(k,":",v)
```
---
### CLI Examples

``` stac-check https://raw.githubusercontent.com/stac-utils/pystac/main/tests/data-files/examples/0.9.0/collection-spec/examples/landsat-collection.json --recursive ```
```
Expand Down Expand Up @@ -208,4 +219,4 @@ Validation error message:
'id' is a required property of the root of the STAC object

This object has 5 links
</pre>
</pre>
43 changes: 30 additions & 13 deletions stac_check/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
import os
from dataclasses import dataclass
import requests
from typing import Optional
from typing import Optional, Union
from dotenv import load_dotenv
import pkg_resources

load_dotenv()

@dataclass
class Linter:
item: str
item: Union[str, dict] # url, file name, or dictionary
config_file: Optional[str] = None
assets: bool = False
links: bool = False
Expand All @@ -38,7 +38,7 @@ def __post_init__(self):
self.invalid_link_request = self.check_links_assets(10, "links", "request") if self.links else None
self.schema = self.message["schema"] if "schema" in self.message else []
self.object_id = self.data["id"] if "id" in self.data else ""
self.file_name = os.path.basename(self.item).split('.')[0]
self.file_name = self.get_asset_name(self.item)
self.best_practices_msg = self.create_best_practices_msg()

@staticmethod
Expand All @@ -57,24 +57,41 @@ def parse_config(config_file):

return default_config

def get_asset_name(self, file):
if isinstance(file, str):
return os.path.basename(file).split('.')[0]
else:
return file["id"]

def load_data(self, file):
if is_valid_url(file):
resp = requests.get(file)
data = resp.json()
if isinstance(file, str):
if is_valid_url(file):
resp = requests.get(file)
data = resp.json()
else:
with open(file) as json_file:
data = json.load(json_file)
return data
else:
with open(file) as json_file:
data = json.load(json_file)
return data
return file

def validate_file(self, file):
stac = StacValidate(file, links=self.links, assets=self.assets)
stac.run()
if isinstance(file, str):
stac = StacValidate(file, links=self.links, assets=self.assets)
stac.run()
else:
stac = StacValidate()
stac.validate_dict(file)
return stac.message[0]

def recursive_validation(self, file):
if self.recursive:
stac = StacValidate(file, recursive=True, max_depth=self.max_depth)
stac.run()
if isinstance(file, str):
stac = StacValidate(file, recursive=True, max_depth=self.max_depth)
stac.run()
else:
stac = StacValidate(recursive=True, max_depth=self.max_depth)
stac.validate_dict(file)
return stac.message

def set_update_message(self):
Expand Down
245 changes: 245 additions & 0 deletions tests/test_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,248 @@ def test_catalog_name():
file = "sample_files/1.0.0/collection.json"
linter = Linter(file)
assert linter.check_catalog_id_file_name()

def test_lint_dict_collection():
file = {
"id": "simple-collection",
"type": "Collection",
"stac_extensions": [
"https://stac-extensions.github.io/eo/v1.0.0/schema.json",
"https://stac-extensions.github.io/projection/v1.0.0/schema.json",
"https://stac-extensions.github.io/view/v1.0.0/schema.json"
],
"stac_version": "1.0.0",
"description": "A simple collection demonstrating core catalog fields with links to a couple of items",
"title": "Simple Example Collection",
"providers": [
{
"name": "Remote Data, Inc",
"description": "Producers of awesome spatiotemporal assets",
"roles": [
"producer",
"processor"
],
"url": "http://remotedata.io"
}
],
"extent": {
"spatial": {
"bbox": [
[
172.91173669923782,
1.3438851951615003,
172.95469614953714,
1.3690476620161975
]
]
},
"temporal": {
"interval": [
[
"2020-12-11T22:38:32.125Z",
"2020-12-14T18:02:31.437Z"
]
]
}
},
"license": "CC-BY-4.0",
"summaries": {
"platform": [
"cool_sat1",
"cool_sat2"
],
"constellation": [
"ion"
],
"instruments": [
"cool_sensor_v1",
"cool_sensor_v2"
],
"gsd": {
"minimum": 0.512,
"maximum": 0.66
},
"eo:cloud_cover": {
"minimum": 1.2,
"maximum": 1.2
},
"proj:epsg": {
"minimum": 32659,
"maximum": 32659
},
"view:sun_elevation": {
"minimum": 54.9,
"maximum": 54.9
},
"view:off_nadir": {
"minimum": 3.8,
"maximum": 3.8
},
"view:sun_azimuth": {
"minimum": 135.7,
"maximum": 135.7
}
},
"links": [
{
"rel": "root",
"href": "./collection.json",
"type": "application/json",
"title": "Simple Example Collection"
},
{
"rel": "item",
"href": "./simple-item.json",
"type": "application/geo+json",
"title": "Simple Item"
},
{
"rel": "item",
"href": "./core-item.json",
"type": "application/geo+json"
},
{
"rel": "item",
"href": "./extended-item.json",
"type": "application/geo+json",
"title": "Extended Item"
}
]
}
linter = Linter(file)
assert linter.valid_stac == True
assert linter.asset_type == "COLLECTION"
assert linter.check_catalog_id_file_name() == False
assert linter.create_best_practices_dict()["check_catalog_id"] == ["Object should be called 'collection.json' not 'simple-collection.json'"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Collections usually have descriptive IDs, e.g. https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a. I think this lint is incorrect.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This project was an attempt to follow the STAC Best Practices document. I think in the document this advice is only for static catalogs? /~https://github.com/radiantearth/stac-spec/blob/master/best-practices.md#static-catalogs

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this. I'm changing the check so that it only activates with a static file.


def test_lint_dict_item():
file = {
"stac_version": "1.0.0",
"stac_extensions": [],
"type": "Feature",
"id": "20201211_223832_CS2",
"bbox": [
172.91173669923782,
1.3438851951615003,
172.95469614953714,
1.3690476620161975
],
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
172.91173669923782,
1.3438851951615003
],
[
172.95469614953714,
1.3438851951615003
],
[
172.95469614953714,
1.3690476620161975
],
[
172.91173669923782,
1.3690476620161975
],
[
172.91173669923782,
1.3438851951615003
]
]
]
},
"properties": {
"title": "Core Item",
"description": "A sample STAC Item that includes examples of all common metadata",
"datetime": None,
"start_datetime": "2020-12-11T22:38:32.125Z",
"end_datetime": "2020-12-11T22:38:32.327Z",
"created": "2020-12-12T01:48:13.725Z",
"updated": "2020-12-12T01:48:13.725Z",
"platform": "cool_sat1",
"instruments": [
"cool_sensor_v1"
],
"constellation": "ion",
"mission": "collection 5624",
"gsd": 0.512
},
"collection": "simple-collection",
"links": [
{
"rel": "collection",
"href": "./collection.json",
"type": "application/json",
"title": "Simple Example Collection"
},
{
"rel": "root",
"href": "./collection.json",
"type": "application/json",
"title": "Simple Example Collection"
},
{
"rel": "parent",
"href": "./collection.json",
"type": "application/json",
"title": "Simple Example Collection"
},
{
"rel": "alternate",
"type": "text/html",
"href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html",
"title": "HTML version of this STAC Item"
}
],
"assets": {
"analytic": {
"href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_analytic.tif",
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"title": "4-Band Analytic",
"roles": [
"data"
]
},
"thumbnail": {
"href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.jpg",
"title": "Thumbnail",
"type": "image/png",
"roles": [
"thumbnail"
]
},
"visual": {
"href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.tif",
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"title": "3-Band Visual",
"roles": [
"visual"
]
},
"udm": {
"href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_analytic_udm.tif",
"title": "Unusable Data Mask",
"type": "image/tiff; application=geotiff;"
},
"json-metadata": {
"href": "http://remotedata.io/catalog/20201211_223832_CS2/extended-metadata.json",
"title": "Extended Metadata",
"type": "application/json",
"roles": [
"metadata"
]
},
"ephemeris": {
"href": "http://cool-sat.com/catalog/20201211_223832_CS2/20201211_223832_CS2.EPH",
"title": "Satellite Ephemeris Metadata"
}
}
}
linter = Linter(file)
assert linter.valid_stac == True
assert linter.asset_type == "ITEM"
assert linter.check_datetime_null() == True
assert linter.create_best_practices_dict()["datetime_null"] == ['Please avoid setting the datetime field to null, many clients search on this field']