Skip to content

Commit

Permalink
feat: generic driver and updated Sentinel2L1C
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrunato committed Jan 22, 2025
1 parent 5de2c4a commit e708a0b
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 38 deletions.
30 changes: 29 additions & 1 deletion eodag/api/product/drivers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING, TypedDict
from typing import TYPE_CHECKING, Optional, TypedDict

if TYPE_CHECKING:
from eodag.api.product import EOProduct
Expand All @@ -37,8 +37,36 @@ class DatasetDriver(metaclass=type):
#: legacy driver for deprecated get_data method usage
legacy: DatasetDriver

ASSET_KEYS_PATTERNS_ROLES: list[AssetPatterns] = []

STRIP_SPECIAL_PATTERN = re.compile(r"^[^A-Z0-9]+|[^A-Z0-9]+$", re.IGNORECASE)

def _normalize_key(self, key, eo_product):
# default cleanup
norm_key = key.replace(eo_product.properties.get("id", ""), "")
norm_key = re.sub(self.STRIP_SPECIAL_PATTERN, "", norm_key)

return norm_key

def guess_asset_key_and_roles(
self, href: str, eo_product: EOProduct
) -> tuple[Optional[str], Optional[list[str]]]:
"""Guess the asset key and roles from the given href.
:param href: The asset href
:param eo_product: The product to which the asset belongs
:returns: The asset key and roles
"""
for pattern_dict in self.ASSET_KEYS_PATTERNS_ROLES:
if matched := pattern_dict["pattern"].match(href):
extracted_key, roles = (
"".join([m for m in matched.groups() if m is not None]),
pattern_dict.get("roles"),
)
normalized_key = self._normalize_key(extracted_key, eo_product)
return normalized_key or extracted_key, roles
return None, None

def get_data_address(self, eo_product: EOProduct, band: str) -> str:
"""Retrieve the address of the dataset represented by `eo_product`.
Expand Down
29 changes: 28 additions & 1 deletion eodag/api/product/drivers/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
from __future__ import annotations

import logging
import re
from typing import TYPE_CHECKING

from eodag.api.product.drivers.base import DatasetDriver
from eodag.api.product.drivers.base import AssetPatterns, DatasetDriver
from eodag.utils.exceptions import AddressNotFound

if TYPE_CHECKING:
Expand All @@ -36,6 +37,32 @@
class GenericDriver(DatasetDriver):
"""Generic Driver for products that need to be downloaded"""

ASSET_KEYS_PATTERNS_ROLES: list[AssetPatterns] = [
# metadata
{
"pattern": re.compile(
r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)$", re.IGNORECASE
),
"roles": ["metadata"],
},
# thumbnail
{
"pattern": re.compile(
r"^(?:.*[/\\])?(thumbnail)(\.jpg|\.jpeg|\.png)$", re.IGNORECASE
),
"roles": ["thumbnail"],
},
# quicklook
{
"pattern": re.compile(
r"^(?:.*[/\\])?([^/\\]+-ql|preview)(\.jpg|\.jpeg|\.png)$", re.IGNORECASE
),
"roles": ["overview"],
},
# default
{"pattern": re.compile(r"^(?:.*[/\\])?([^/\\]+)$"), "roles": ["auxiliary"]},
]

def _get_data_address(self, eo_product: EOProduct, band: str) -> str:
"""Get the address of a product subdataset.
Expand Down
54 changes: 19 additions & 35 deletions eodag/api/product/drivers/sentinel2_l1c.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING

from eodag.api.product.drivers.base import AssetPatterns, DatasetDriver
from eodag.utils.exceptions import AddressNotFound
Expand All @@ -38,26 +38,28 @@ class Sentinel2L1C(DatasetDriver):
"TCI": ("TCI",),
}
BANDS_DEFAULT_GSD = {
"10m": ("B02", "B03", "B04", "B08", "TCI"),
"20m": ("B05", "B06", "B07", "B11", "B12", "B8A"),
"60m": ("B01", "B09", "B10"),
"10M": ("B02", "B03", "B04", "B08", "TCI"),
"20M": ("B05", "B06", "B07", "B11", "B12", "B8A"),
"60M": ("B01", "B09", "B10"),
}

ASSET_KEYS_PATTERNS_ROLES: list[AssetPatterns] = [
# masks
{
"pattern": re.compile(r"^.*?(MSK_[^/\\]+)\.jp2$", re.IGNORECASE),
"pattern": re.compile(r"^.*?(MSK_[^/\\]+)\.(?:jp2|tiff?)$", re.IGNORECASE),
"roles": ["data-mask"],
},
# visual
{
"pattern": re.compile(r"^.*?(TCI)(_[0-9]+m)?\.jp2$", re.IGNORECASE),
"pattern": re.compile(
r"^.*?(TCI)(_[0-9]+m)?\.(?:jp2|tiff?)$", re.IGNORECASE
),
"roles": ["visual"],
},
# bands
{
"pattern": re.compile(
r"^.*?([A-Z]+[0-9]*[A-Z]?)(_[0-9]+m)?\.jp2$", re.IGNORECASE
r"^.*?([A-Z]+[0-9]*[A-Z]?)(_[0-9]+m)?\.(?:jp2|tiff?)$", re.IGNORECASE
),
"roles": ["data"],
},
Expand All @@ -68,20 +70,25 @@ class Sentinel2L1C(DatasetDriver):
),
"roles": ["metadata"],
},
# thumbnail
{
"pattern": re.compile(
r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)$", re.IGNORECASE
),
"roles": ["thumbnail"],
},
# quicklook
{
"pattern": re.compile(
r"^(?:.*[/\\])?([^/\\]+-ql|preview)(\.jpg)$", re.IGNORECASE
r"^(?:.*[/\\])?([^/\\]+-ql|preview)(\.jpe?g|\.png)$", re.IGNORECASE
),
"roles": ["overview"],
},
# default
{"pattern": re.compile(r"^(?:.*[/\\])?([^/\\]+)$"), "roles": ["auxiliary"]},
]

STRIP_SPECIAL_PATTERN = DatasetDriver.STRIP_SPECIAL_PATTERN

def _normalize_key(self, key, eo_product):
def _normalize_key(self, key: str, eo_product: EOProduct) -> str:
upper_key = key.upper()
# check if key matched any normalized
for res in self.BANDS_DEFAULT_GSD:
Expand All @@ -90,30 +97,7 @@ def _normalize_key(self, key, eo_product):
if norm_key in upper_key:
return norm_key

# default cleanup
norm_key = key.replace(eo_product.properties.get("id", ""), "")
norm_key = re.sub(self.STRIP_SPECIAL_PATTERN, "", norm_key)

return norm_key

def guess_asset_key_and_roles(
self, href: str, eo_product: EOProduct
) -> tuple[Optional[str], Optional[list[str]]]:
"""Guess the asset key and roles from the given href.
:param href: The asset href
:param eo_product: The product to which the asset belongs
:returns: The asset key and roles
"""
for pattern_dict in self.ASSET_KEYS_PATTERNS_ROLES:
if matched := pattern_dict["pattern"].match(href):
extracted_key, roles = (
"".join([m for m in matched.groups() if m is not None]),
pattern_dict.get("roles"),
)
normalized_key = self._normalize_key(extracted_key, eo_product)
return normalized_key or extracted_key, roles
return None, None
return super()._normalize_key(key, eo_product)

def _get_data_address(self, eo_product: EOProduct, band: str) -> str:
"""Compute the address of a subdataset for a Sentinel2 L1C product.
Expand Down
6 changes: 5 additions & 1 deletion eodag/plugins/search/creodias_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
from types import MethodType
from typing import Any
from urllib.parse import urlparse

import boto3
import botocore
Expand Down Expand Up @@ -91,10 +93,12 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
key, roles = product.driver.guess_asset_key_and_roles(
s3_obj["Key"], product
)
parsed_url = urlparse(s3_obj["Key"])
title = os.path.basename(parsed_url.path)

if key and key not in product.assets:
product.assets[key] = {
"title": key,
"title": title,
"roles": roles,
"href": f"s3://{config.s3_bucket}/{s3_obj['Key']}",
}
Expand Down

0 comments on commit e708a0b

Please sign in to comment.