Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update 1.4.0 #111

Merged
merged 12 commits into from
Oct 9, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
pre-commit 1
jonhealy1 committed Oct 9, 2024
commit fc9b7f2fa990a8a637132764584b21379a88856f
16 changes: 8 additions & 8 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -6,24 +6,24 @@
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = 'stac-check'
author = 'Jonathan Healy'
release = '1.3.1'
project = "stac-check"
author = "Jonathan Healy"
release = "1.3.1"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions = []

templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'alabaster'
html_static_path = ['_static']
html_theme = "alabaster"
html_static_path = ["_static"]

html_css_files = [
'custom.css',
"custom.css",
]
60 changes: 36 additions & 24 deletions stac_check/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import click
from .lint import Linter
import pkg_resources

def link_asset_message(link_list:list, type: str, format: str) -> None:
from .lint import Linter


def link_asset_message(link_list: list, type: str, format: str) -> None:
"""Prints a list of links or assets and any errors associated with them.

Args:
@@ -20,6 +22,7 @@ def link_asset_message(link_list:list, type: str, format: str) -> None:
else:
click.secho(f"No {type.upper()} {format} errors!", fg="green")


def recursive_message(linter: Linter) -> None:
"""Displays messages related to the recursive validation of assets in a collection or catalog.

@@ -36,18 +39,19 @@ def recursive_message(linter: Linter) -> None:
for count, msg in enumerate(linter.validate_all):
click.secho(f"Asset {count+1} Validated: {msg['path']}", bg="white", fg="black")
click.secho()
if msg['valid_stac'] == True:
if msg["valid_stac"] == True:
recursive_linter = Linter(msg["path"], recursive=0)
cli_message(recursive_linter)
else:
click.secho(f"Valid: {msg['valid_stac']}", fg='red')
click.secho(f"Valid: {msg['valid_stac']}", fg="red")
click.secho("Schemas validated: ", fg="blue")
for schema in msg["schema"]:
click.secho(f" {schema}")
click.secho(f"Error Type: {msg['error_type']}", fg='red')
click.secho(f"Error Message: {msg['error_message']}", fg='red')
click.secho(f"Error Type: {msg['error_type']}", fg="red")
click.secho(f"Error Message: {msg['error_message']}", fg="red")
click.secho("-------------------------")


def intro_message(linter: Linter) -> None:
"""Prints an introduction message for the stac-check tool.

@@ -63,64 +67,69 @@ def intro_message(linter: Linter) -> None:
Returns:
None.
"""
click.secho("""
click.secho(
"""
____ ____ __ ___ ___ _ _ ____ ___ __ _
/ ___)(_ _)/ _\ / __)___ / __)/ )( \( __)/ __)( / )
\___ \ )( / \( (__(___)( (__ ) __ ( ) _)( (__ ) (
(____/ (__)\_/\_/ \___) \___)\_)(_/(____)\___)(__\_)
""")
"""
)

click.secho("stac-check: STAC spec validaton and linting tool", bold=True)

click.secho()

if linter.version == "1.0.0":
click.secho(linter.set_update_message(), fg='green')
click.secho(linter.set_update_message(), fg="green")
else:
click.secho(linter.set_update_message(), fg='red')
click.secho(linter.set_update_message(), fg="red")

click.secho()

click.secho(f"Validator: stac-validator {linter.validator_version}", bg="blue", fg="white")
click.secho(
f"Validator: stac-validator {linter.validator_version}", bg="blue", fg="white"
)

click.secho()


def cli_message(linter: Linter) -> None:
"""Prints various messages about the STAC object being validated.

Args:
linter: The `Linter` object containing information about
linter: The `Linter` object containing information about
the STAC object to be validated.

Returns:
None
"""
if linter.valid_stac == True:
click.secho(f"Valid {linter.asset_type}: {linter.valid_stac}", fg='green')
click.secho(f"Valid {linter.asset_type}: {linter.valid_stac}", fg="green")
else:
click.secho(f"Valid {linter.asset_type}: {linter.valid_stac}", fg='red')
click.secho(f"Valid {linter.asset_type}: {linter.valid_stac}", fg="red")

''' schemas validated for core object '''
""" schemas validated for core object """
click.secho()
if len(linter.schema) > 0:
click.secho("Schemas validated: ", fg="blue")
for schema in linter.schema:
click.secho(f" {schema}")

''' best practices message'''
""" best practices message"""
click.secho()
for message in linter.best_practices_msg:
if message == linter.best_practices_msg[0]:
click.secho(message, bg='blue')
click.secho(message, bg="blue")
else:
click.secho(message, fg='red')
click.secho(message, fg="red")

if linter.validate_all == True:
click.secho()
click.secho(f"Recursive validation has passed!", fg='blue')
click.secho(f"Recursive validation has passed!", fg="blue")
elif linter.validate_all == False and linter.recursive:
click.secho()
click.secho(f"Recursive validation has failed!", fg='red')
click.secho(f"Recursive validation has failed!", fg="red")

if linter.invalid_asset_format is not None:
click.secho()
@@ -143,7 +152,7 @@ def cli_message(linter: Linter) -> None:
click.secho(f" {linter.error_type}")

if linter.error_msg != "":
click.secho(f"Validation error message: ", fg='red')
click.secho(f"Validation error message: ", fg="red")
click.secho(f" {linter.error_msg}")

click.secho(f"This object has {len(linter.data['links'])} links")
@@ -153,6 +162,7 @@ def cli_message(linter: Linter) -> None:
### Stac validator response for reference
# click.secho(json.dumps(linter.message, indent=4))


@click.option(
"--recursive",
"-r",
@@ -172,12 +182,14 @@ def cli_message(linter: Linter) -> None:
"-l", "--links", is_flag=True, help="Validate links for format and response."
)
@click.command()
@click.argument('file')
@click.argument("file")
@click.version_option(version=pkg_resources.require("stac-check")[0].version)
def main(file, recursive, max_depth, assets, links):
linter = Linter(file, assets=assets, links=links, recursive=recursive, max_depth=max_depth)
linter = Linter(
file, assets=assets, links=links, recursive=recursive, max_depth=max_depth
)
intro_message(linter)
if recursive > 0:
recursive_message(linter)
else:
cli_message(linter)
cli_message(linter)
160 changes: 106 additions & 54 deletions stac_check/lint.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import pkg_resources
from stac_validator.validate import StacValidate
from stac_validator.utilities import is_valid_url
import json
import yaml
import os
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Union

import pkg_resources
import requests
from typing import Optional, Union, Dict, Any, List
import yaml
from dotenv import load_dotenv
import pkg_resources
from stac_validator.utilities import is_valid_url
from stac_validator.validate import StacValidate

load_dotenv()


@dataclass
class Linter:
"""A class for linting STAC JSON files and generating validation messages.
@@ -66,11 +67,11 @@ def get_asset_name(self, file: Union[str, Dict] = None) -> str:
check_links_assets(self, num_links: int, url_type: str, format_type: str) -> List[str]:
Checks whether the STAC JSON file has links or assets with invalid formats or requests.
check_error_type(self) -> str:
check_error_type(self) -> str:
Checks whether the STAC JSON file has an error type.
check_error_message(self) -> str:
Checks whether the STAC JSON file has an error message.
Checks whether the STAC JSON file has an error message.
def check_summaries(self) -> bool:
Checks whether the STAC JSON file has summaries.
@@ -88,9 +89,9 @@ def check_summaries(self) -> bool:
Checks whether the STAC JSON file has unlocated items.
check_geometry_null(self) -> bool:
Checks whether the STAC JSON file has a null geometry.
Checks whether the STAC JSON file has a null geometry.
check_searchable_identifiers(self) -> bool:
check_searchable_identifiers(self) -> bool:
Checks whether the STAC JSON file has searchable identifiers.
check_percent_encoded(self) -> bool:
@@ -117,7 +118,8 @@ def check_summaries(self) -> bool:
create_best_practices_msg(self) -> List[str]:
Creates a message with best practices recommendations for the STAC JSON file.
"""
item: Union[str, dict] # url, file name, or dictionary

item: Union[str, dict] # url, file name, or dictionary
config_file: Optional[str] = None
assets: bool = False
links: bool = False
@@ -128,17 +130,27 @@ def __post_init__(self):
self.data = self.load_data(self.item)
self.message = self.validate_file(self.item)
self.config = self.parse_config(self.config_file)
self.asset_type = self.message["asset_type"] if "asset_type" in self.message else ""
self.asset_type = (
self.message["asset_type"] if "asset_type" in self.message else ""
)
self.version = self.message["version"] if "version" in self.message else ""
self.validator_version = pkg_resources.require("stac-validator")[0].version
self.validate_all = self.recursive_validation(self.item)
self.valid_stac = self.message["valid_stac"]
self.error_type = self.check_error_type()
self.error_msg = self.check_error_message()
self.invalid_asset_format = self.check_links_assets(10, "assets", "format") if self.assets else None
self.invalid_asset_request = self.check_links_assets(10, "assets", "request") if self.assets else None
self.invalid_link_format = self.check_links_assets(10, "links", "format") if self.links else None
self.invalid_link_request = self.check_links_assets(10, "links", "request") if self.links else None
self.invalid_asset_format = (
self.check_links_assets(10, "assets", "format") if self.assets else None
)
self.invalid_asset_request = (
self.check_links_assets(10, "assets", "request") if self.assets else None
)
self.invalid_link_format = (
self.check_links_assets(10, "links", "format") if self.links else None
)
self.invalid_link_request = (
self.check_links_assets(10, "links", "request") if self.links else None
)
self.schema = self.message["schema"] if "schema" in self.message else []
self.object_id = self.data["id"] if "id" in self.data else ""
self.file_name = self.get_asset_name(self.item)
@@ -179,7 +191,7 @@ def parse_config(config_file: Optional[str] = None) -> Dict:
with open(config_file) as f:
config = yaml.load(f, Loader=yaml.FullLoader)
default_config.update(config)

return default_config

def get_asset_name(self, file: Union[str, Dict] = None) -> str:
@@ -196,7 +208,7 @@ def get_asset_name(self, file: Union[str, Dict] = None) -> str:
TypeError: If the input `file` is not a string or a dictionary.
"""
if isinstance(file, str):
return os.path.basename(file).split('.')[0]
return os.path.basename(file).split(".")[0]
else:
return file["id"]

@@ -285,7 +297,9 @@ def set_update_message(self) -> str:
else:
return "Thanks for using STAC version 1.0.0!"

def check_links_assets(self, num_links: int, url_type: str, format_type: str) -> List[str]:
def check_links_assets(
self, num_links: int, url_type: str, format_type: str
) -> List[str]:
"""Checks the links and assets in the STAC catalog and returns a list of invalid links of a specified type and format.
Args:
@@ -298,16 +312,18 @@ def check_links_assets(self, num_links: int, url_type: str, format_type: str) ->
"""
links = []
if f"{url_type}_validated" in self.message:
for invalid_request_url in self.message[f"{url_type}_validated"][f"{format_type}_invalid"]:
if invalid_request_url not in links and 'http' in invalid_request_url:
for invalid_request_url in self.message[f"{url_type}_validated"][
f"{format_type}_invalid"
]:
if invalid_request_url not in links and "http" in invalid_request_url:
links.append(invalid_request_url)
num_links = num_links - 1
if num_links == 0:
return links
return links

def check_error_type(self) -> str:
"""Returns the error type of a STAC validation if it exists in the validation message,
"""Returns the error type of a STAC validation if it exists in the validation message,
and an empty string otherwise.
Returns:
@@ -338,6 +354,8 @@ def check_summaries(self) -> bool:
"""
if self.asset_type == "COLLECTION":
return "summaries" in self.data
else:
return False

def check_bloated_links(self, max_links: Optional[int] = 20) -> bool:
"""Checks if the number of links in the STAC data exceeds a certain maximum.
@@ -350,6 +368,8 @@ def check_bloated_links(self, max_links: Optional[int] = 20) -> bool:
"""
if "links" in self.data:
return len(self.data["links"]) > max_links
else:
return False

def check_bloated_metadata(self, max_properties: Optional[int] = 20) -> bool:
"""Checks whether a STAC item's metadata contains too many properties.
@@ -391,26 +411,33 @@ def check_unlocated(self) -> bool:

def check_geometry_null(self) -> bool:
"""Checks if a STAC item has a null geometry property.
Returns:
bool: A boolean indicating whether the geometry property is null (True) or not (False).
bool: A boolean indicating whether the geometry property is null (True) or not (False).
"""
if "geometry" in self.data:
return self.data["geometry"] is None
else:
return False

def check_searchable_identifiers(self) -> bool:
"""Checks if the identifiers of a STAC item are searchable, i.e.,
"""Checks if the identifiers of a STAC item are searchable, i.e.,
they only contain lowercase letters, numbers, hyphens, and underscores.
Returns:
bool: True if the identifiers are searchable, False otherwise.
bool: True if the identifiers are searchable, False otherwise.
"""
if self.asset_type == "ITEM":
if self.asset_type == "ITEM":
for letter in self.object_id:
if letter.islower() or letter.isnumeric() or letter == '-' or letter == '_':
if (
letter.islower()
or letter.isnumeric()
or letter == "-"
or letter == "_"
):
pass
else:
return False
return False
return True

def check_percent_encoded(self) -> bool:
@@ -420,24 +447,30 @@ def check_percent_encoded(self) -> bool:
Returns:
bool: True if the identifiers are percent-encoded, False otherwise.
"""
return self.asset_type == "ITEM" and "/" in self.object_id or ":" in self.object_id
return (
self.asset_type == "ITEM" and "/" in self.object_id or ":" in self.object_id
)

def check_thumbnail(self) -> bool:
"""Checks if the thumbnail of a STAC item is valid, i.e., it has a valid format.
Returns:
bool: True if the thumbnail is valid, False otherwise.
"""
if "assets" in self.data:
if "thumbnail" in self.data["assets"]:
if "type" in self.data["assets"]["thumbnail"]:
if "png" in self.data["assets"]["thumbnail"]["type"] or "jpeg" in self.data["assets"]["thumbnail"]["type"] or \
"jpg" in self.data["assets"]["thumbnail"]["type"] or "webp" in self.data["assets"]["thumbnail"]["type"]:
if (
"png" in self.data["assets"]["thumbnail"]["type"]
or "jpeg" in self.data["assets"]["thumbnail"]["type"]
or "jpg" in self.data["assets"]["thumbnail"]["type"]
or "webp" in self.data["assets"]["thumbnail"]["type"]
):
return True
else:
return False
return True

def check_links_title_field(self) -> bool:
"""Checks if all links in a STAC collection or catalog have a 'title' field.
The 'title' field is not required for the 'self' link.
@@ -451,10 +484,9 @@ def check_links_title_field(self) -> bool:
return False
return True


def check_links_self(self) -> bool:
"""Checks whether the "self" link is present in the STAC collection or catalog or absent in STAC item.
Returns:
bool: True if the "self" link is present in STAC collection or catalog or absent in STAC item, False otherwise.
"""
@@ -474,14 +506,14 @@ def check_item_id_file_name(self) -> bool:

def check_catalog_file_name(self) -> bool:
"""Checks whether the filename of a Catalog or Collection conforms to the STAC specification.
Returns:
bool: True if the filename is valid, False otherwise.
"""
if isinstance(self.item, str) and ".json" in self.item:
if self.asset_type == "CATALOG" and 'catalog.json' not in self.item:
return False
elif self.asset_type == "COLLECTION" and 'collection.json' not in self.item:
if self.asset_type == "CATALOG" and "catalog.json" not in self.item:
return False
elif self.asset_type == "COLLECTION" and "collection.json" not in self.item:
return False
return True
else:
@@ -502,7 +534,10 @@ def create_best_practices_dict(self) -> Dict:
max_properties = self.config["settings"]["max_properties"]

# best practices - item ids should only contain searchable identifiers
if self.check_searchable_identifiers() == False and config["searchable_identifiers"] == True:
if (
self.check_searchable_identifiers() == False
and config["searchable_identifiers"] == True
):
msg_1 = f"Item name '{self.object_id}' should only contain Searchable identifiers"
msg_2 = f"Identifiers should consist of only lowercase characters, numbers, '_', and '-'"
best_practices_dict["searchable_identifiers"] = [msg_1, msg_2]
@@ -518,8 +553,11 @@ def create_best_practices_dict(self) -> Dict:
msg_1 = f"Item file names should match their ids: '{self.file_name}' not equal to '{self.object_id}"
best_practices_dict["check_item_id"] = [msg_1]

# best practices - collection and catalog file names should be collection.json and catalog.json
if self.check_catalog_file_name() == False and config["catalog_id_file_name"] == True:
# best practices - collection and catalog file names should be collection.json and catalog.json
if (
self.check_catalog_file_name() == False
and config["catalog_id_file_name"] == True
):
msg_1 = f"Object should be called '{self.asset_type.lower()}.json' not '{self.file_name}.json'"
best_practices_dict["check_catalog_id"] = [msg_1]

@@ -545,23 +583,37 @@ def create_best_practices_dict(self) -> Dict:
best_practices_dict["null_geometry"] = [msg_1]

# check to see if there are too many links
if self.check_bloated_links(max_links=max_links) and config["bloated_links"] == True:
if (
self.check_bloated_links(max_links=max_links)
and config["bloated_links"] == True
):
msg_1 = f"You have {len(self.data['links'])} links. Please consider using sub-collections or sub-catalogs"
best_practices_dict["bloated_links"] = [msg_1]

# best practices - check for bloated metadata in properties
if self.check_bloated_metadata(max_properties=max_properties) and config["bloated_metadata"] == True:
if (
self.check_bloated_metadata(max_properties=max_properties)
and config["bloated_metadata"] == True
):
msg_1 = f"You have {len(self.data['properties'])} properties. Please consider using links to avoid bloated metadata"
best_practices_dict["bloated_metadata"] = [msg_1]

# best practices - ensure thumbnail is a small file size ["png", "jpeg", "jpg", "webp"]
if not self.check_thumbnail() and self.asset_type == "ITEM" and config["check_thumbnail"] == True:
msg_1 = f"A thumbnail should have a small file size ie. png, jpeg, jpg, webp"
if (
not self.check_thumbnail()
and self.asset_type == "ITEM"
and config["check_thumbnail"] == True
):
msg_1 = (
f"A thumbnail should have a small file size ie. png, jpeg, jpg, webp"
)
best_practices_dict["check_thumbnail"] = [msg_1]

# best practices - ensure that links in catalogs and collections include a title field
if not self.check_links_title_field() and config["links_title"] == True:
msg_1 = f"Links in catalogs and collections should always have a 'title' field"
msg_1 = (
f"Links in catalogs and collections should always have a 'title' field"
)
best_practices_dict["check_links_title"] = [msg_1]

# best practices - ensure that links in catalogs and collections include self link
@@ -576,17 +628,17 @@ def create_best_practices_msg(self) -> List[str]:
Generates a list of best practices messages based on the results of the 'create_best_practices_dict' method.
Returns:
A list of strings, where each string contains a best practice message. Each message starts with the
'STAC Best Practices:' base string and is followed by a specific recommendation. Each message is indented
A list of strings, where each string contains a best practice message. Each message starts with the
'STAC Best Practices:' base string and is followed by a specific recommendation. Each message is indented
with four spaces, and there is an empty string between each message for readability.
"""
best_practices = list()
base_string = "STAC Best Practices: "
best_practices.append(base_string)

for _,v in self.create_best_practices_dict().items():
for _, v in self.create_best_practices_dict().items():
for value in v:
best_practices.extend([" " +value])
best_practices.extend([" " + value])
best_practices.extend([""])

return best_practices
return best_practices
9 changes: 7 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import pytest
from click.testing import CliRunner

from stac_check.cli import main
import pytest

INTRO = "stac-check: STAC spec validaton and linting tool"
VALID_ITEM = "Valid ITEM: True"
VERSION_MSG_1 = "Thanks for using STAC version 1.0.0!"
VALIDATOR = "Validator: stac-validator 2.4.0"
SCHEMA_MSG = "Schemas validated: "


@pytest.mark.skip(reason="cli output is changing constantly right now")
def test_core_item_100():
runner = CliRunner()
@@ -18,4 +20,7 @@ def test_core_item_100():
assert result.output.splitlines()[3] == VALIDATOR
assert result.output.splitlines()[4] == VALID_ITEM
assert result.output.splitlines()[5] == SCHEMA_MSG
assert result.output.splitlines()[6] == """ https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json"""
assert (
result.output.splitlines()[6]
== """ https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json"""
)
7 changes: 3 additions & 4 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from stac_check.lint import Linter


def test_linter_config_file():
file = "sample_files/1.0.0/core-item.json"
linter = Linter(file)
@@ -8,7 +9,7 @@ def test_linter_config_file():
assert linter.config["linting"]["searchable_identifiers"] == True
assert linter.create_best_practices_dict()["searchable_identifiers"] == [
f"Item name '{linter.object_id}' should only contain Searchable identifiers",
"Identifiers should consist of only lowercase characters, numbers, '_', and '-'"
"Identifiers should consist of only lowercase characters, numbers, '_', and '-'",
]

# Load config file
@@ -17,6 +18,7 @@ def test_linter_config_file():
assert linter.config["linting"]["searchable_identifiers"] == False
assert "searchable_identifiers" not in linter.create_best_practices_dict()


def test_linter_max_links():
file = "sample_files/1.0.0/core-item-bloated.json"
linter = Linter(file)
@@ -27,6 +29,3 @@ def test_linter_max_links():
# Load config file
linter = Linter(file, config_file="tests/test.config.yml")
assert "bloated_links" not in linter.create_best_practices_dict()



287 changes: 124 additions & 163 deletions tests/test_lint.py

Large diffs are not rendered by default.