Skip to content

Commit

Permalink
Allow to provide HTTP headers
Browse files Browse the repository at this point in the history
  • Loading branch information
vprivat-ads committed Dec 2, 2024
1 parent 06ab639 commit 881d0b0
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 15 deletions.
17 changes: 9 additions & 8 deletions stac_validator/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import ssl
from typing import Dict
from urllib.parse import urlparse
from urllib.request import urlopen
from urllib.request import Request, urlopen

import requests # type: ignore

Expand Down Expand Up @@ -77,7 +77,7 @@ def get_stac_type(stac_content: Dict) -> str:
return str(e)


def fetch_and_parse_file(input_path: str) -> Dict:
def fetch_and_parse_file(input_path: str, headers: Dict = {}) -> Dict:
"""Fetches and parses a JSON file from a URL or local file.
Given a URL or local file path to a JSON file, this function fetches the file,
Expand All @@ -87,6 +87,7 @@ def fetch_and_parse_file(input_path: str) -> Dict:
Args:
input_path: A string representing the URL or local file path to the JSON file.
headers: For URLs: HTTP headers to include in the request
Returns:
A dictionary containing the parsed contents of the JSON file.
Expand All @@ -97,7 +98,7 @@ def fetch_and_parse_file(input_path: str) -> Dict:
"""
try:
if is_url(input_path):
resp = requests.get(input_path)
resp = requests.get(input_path, headers=headers)
resp.raise_for_status()
data = resp.json()
else:
Expand Down Expand Up @@ -150,9 +151,7 @@ def set_schema_addr(version: str, stac_type: str) -> str:


def link_request(
link: Dict,
initial_message: Dict,
open_urls: bool = True,
link: Dict, initial_message: Dict, open_urls: bool = True, headers: Dict = {}
) -> None:
"""Makes a request to a URL and appends it to the relevant field of the initial message.
Expand All @@ -161,6 +160,7 @@ def link_request(
initial_message: A dictionary containing lists for "request_valid", "request_invalid",
"format_valid", and "format_invalid" URLs.
open_urls: Whether to open link href URL
headers: HTTP headers to include in the request
Returns:
None
Expand All @@ -169,11 +169,12 @@ def link_request(
if is_url(link["href"]):
try:
if open_urls:
request = Request(link["href"], headers=headers)
if "s3" in link["href"]:
context = ssl._create_unverified_context()
response = urlopen(link["href"], context=context)
response = urlopen(request, context=context)
else:
response = urlopen(link["href"])
response = urlopen(request)
status_code = response.getcode()
if status_code == 200:
initial_message["request_valid"].append(link["href"])
Expand Down
21 changes: 14 additions & 7 deletions stac_validator/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class StacValidate:
links (bool): Whether to additionally validate links (only works in default mode).
assets (bool): Whether to additionally validate assets (only works in default mode).
assets_open_urls (bool): Whether to open assets URLs when validating assets.
headers (dict): HTTP headers to include in the requests.
extensions (bool): Whether to only validate STAC object extensions.
custom (str): The local filepath or remote URL of a custom JSON schema to validate the STAC object.
verbose (bool): Whether to enable verbose output in recursive mode.
Expand All @@ -56,6 +57,7 @@ def __init__(
links: bool = False,
assets: bool = False,
assets_open_urls: bool = True,
headers: dict = {},
extensions: bool = False,
custom: str = "",
verbose: bool = False,
Expand All @@ -70,6 +72,7 @@ def __init__(
self.links = links
self.assets = assets
self.assets_open_urls = assets_open_urls
self.headers: Dict = headers
self.recursive = recursive
self.max_depth = max_depth
self.extensions = extensions
Expand Down Expand Up @@ -125,7 +128,9 @@ def assets_validator(self) -> Dict:
assets = self.stac_content.get("assets")
if assets:
for asset in assets.values():
link_request(asset, initial_message, self.assets_open_urls)
link_request(
asset, initial_message, self.assets_open_urls, self.headers
)
return initial_message

def links_validator(self) -> Dict:
Expand All @@ -145,7 +150,7 @@ def links_validator(self) -> Dict:
for link in self.stac_content["links"]:
if not is_valid_url(link["href"]):
link["href"] = root_url + link["href"][1:]
link_request(link, initial_message)
link_request(link, initial_message, True, self.headers)

return initial_message

Expand Down Expand Up @@ -345,7 +350,9 @@ def recursive_validator(self, stac_type: str) -> bool:
self.stac_file = st + "/" + address
else:
self.stac_file = address
self.stac_content = fetch_and_parse_file(str(self.stac_file))
self.stac_content = fetch_and_parse_file(
str(self.stac_file), self.headers
)
self.stac_content["stac_version"] = self.version
stac_type = get_stac_type(self.stac_content).lower()

Expand Down Expand Up @@ -414,7 +421,7 @@ def validate_collections(self) -> None:
Returns:
None
"""
collections = fetch_and_parse_file(str(self.stac_file))
collections = fetch_and_parse_file(str(self.stac_file), self.headers)
for collection in collections["collections"]:
self.schema = ""
self.validate_dict(collection)
Expand All @@ -437,7 +444,7 @@ def validate_item_collection(self) -> None:
"""
page = 1
print(f"processing page {page}")
item_collection = fetch_and_parse_file(str(self.stac_file))
item_collection = fetch_and_parse_file(str(self.stac_file), self.headers)
self.validate_item_collection_dict(item_collection)
try:
if self.pages is not None:
Expand All @@ -450,7 +457,7 @@ def validate_item_collection(self) -> None:
next_link = link["href"]
self.stac_file = next_link
item_collection = fetch_and_parse_file(
str(self.stac_file)
str(self.stac_file), self.headers
)
self.validate_item_collection_dict(item_collection)
break
Expand Down Expand Up @@ -489,7 +496,7 @@ def run(self) -> bool:
and not self.item_collection
and not self.collections
):
self.stac_content = fetch_and_parse_file(self.stac_file)
self.stac_content = fetch_and_parse_file(self.stac_file, self.headers)

stac_type = get_stac_type(self.stac_content).upper()
self.version = self.stac_content["stac_version"]
Expand Down

0 comments on commit 881d0b0

Please sign in to comment.