Skip to content

Commit

Permalink
feat: add sorting feature in library mode (#943)
Browse files Browse the repository at this point in the history
  • Loading branch information
anesson-cs authored Feb 21, 2024
1 parent 2ed579c commit dd9a2ef
Show file tree
Hide file tree
Showing 21 changed files with 1,494 additions and 288 deletions.
3 changes: 2 additions & 1 deletion docs/api_reference/core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,10 @@ Misc
EODataAccessGateway.group_by_extent
EODataAccessGateway.guess_product_type
EODataAccessGateway.list_queryables
EODataAccessGateway.available_sortables

.. autoclass:: eodag.api.core.EODataAccessGateway
:members: set_preferred_provider, get_preferred_provider, update_providers_config, list_product_types,
available_providers, search, search_all, search_iter_page, crunch, download, download_all, serialize,
deserialize, deserialize_and_register, load_stac_items, group_by_extent, guess_product_type, get_cruncher,
update_product_types_list, fetch_product_types_list, discover_product_types, list_queryables
update_product_types_list, fetch_product_types_list, discover_product_types, list_queryables, available_sortables
768 changes: 532 additions & 236 deletions docs/notebooks/api_user_guide/4_search.ipynb

Large diffs are not rendered by default.

39 changes: 38 additions & 1 deletion eodag/api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
from eodag.plugins.apis.base import Api
from eodag.plugins.crunch.base import Crunch
from eodag.plugins.search.base import Search
from eodag.types import ProviderSortables
from eodag.utils import Annotated, DownloadedCallback, ProgressCallback

logger = logging.getLogger("eodag.core")
Expand Down Expand Up @@ -546,7 +547,7 @@ def list_product_types(
product_types.append(product_type)
return sorted(product_types, key=itemgetter("ID"))
raise UnsupportedProvider(
f"The requested provider is not (yet) supported: {provider}"
f"invalid requested provider: {provider} is not (yet) supported"
)
# Only get the product types supported by the available providers
for provider in self.available_providers():
Expand Down Expand Up @@ -1805,6 +1806,9 @@ def _do_search(
if not raise_errors:
log_msg += " Raise verbosity of log messages for details"
logger.info(log_msg)
# keep only the message from exception args
if len(e.args) > 1:
e.args = (e.args[0],)
if raise_errors:
# Raise the error, letting the application wrapping eodag know that
# something went bad. This way it will be able to decide what to do next
Expand Down Expand Up @@ -2248,3 +2252,36 @@ def list_queryables(
provider_queryables.update(model_fields_to_annotated(common_queryables))

return provider_queryables

def available_sortables(self) -> Dict[str, Optional[ProviderSortables]]:
"""For each provider, gives its available sortable parameter(s) and its maximum
number of them if it supports the sorting feature, otherwise gives None.
:returns: A dictionnary with providers as keys and dictionnary of sortable parameter(s) and
its (their) maximum number as value(s).
:rtype: dict
:raises: :class:`~eodag.utils.exceptions.UnsupportedProvider`
"""
sortables: Dict[str, Optional[ProviderSortables]] = {}
provider_search_plugins = self._plugins_manager.get_search_plugins()
for provider_search_plugin in provider_search_plugins:
provider = provider_search_plugin.provider
if not hasattr(provider_search_plugin.config, "sort"):
sortables[provider] = None
continue
sortable_params = list(
provider_search_plugin.config.sort["sort_param_mapping"].keys()
)
if not provider_search_plugin.config.sort.get("max_sort_params"):
sortables[provider] = {
"sortables": sortable_params,
"max_sort_params": None,
}
continue
sortables[provider] = {
"sortables": sortable_params,
"max_sort_params": provider_search_plugin.config.sort[
"max_sort_params"
],
}
return sortables
13 changes: 13 additions & 0 deletions eodag/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
ItemsView,
Iterator,
List,
Literal,
Optional,
Tuple,
TypedDict,
Expand All @@ -40,13 +41,15 @@
import yaml
import yaml.constructor
import yaml.parser
from annotated_types import Gt
from jsonpath_ng import JSONPath
from pkg_resources import resource_filename
from requests.auth import AuthBase

from eodag.utils import (
HTTP_REQ_TIMEOUT,
USER_AGENT,
Annotated,
cached_yaml_load,
cached_yaml_load_all,
cast_scalar_value,
Expand Down Expand Up @@ -228,6 +231,15 @@ class Pagination(TypedDict):
count_endpoint: str
start_page: int

class Sort(TypedDict):
"""Configuration for sort during search"""

sort_by_default: List[Tuple[str, str]]
sort_by_tpl: str
sort_param_mapping: Dict[str, str]
sort_order_mapping: Dict[Literal["ascending", "descending"], str]
max_sort_params: Annotated[int, Gt(0)]

class OrderStatusOnSuccess(TypedDict):
"""Configuration for order on-success during download"""

Expand All @@ -251,6 +263,7 @@ class OrderStatusOnSuccess(TypedDict):
result_type: str
results_entry: str
pagination: PluginConfig.Pagination
sort: PluginConfig.Sort
query_params_key: str
discover_metadata: Dict[str, str]
discover_product_types: Dict[str, Any]
Expand Down
3 changes: 3 additions & 0 deletions eodag/plugins/apis/usgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
NoMatchingProductType,
NotAvailableError,
RequestError,
ValidationError,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -117,6 +118,8 @@ def query(
raise NoMatchingProductType(
"Cannot search on USGS without productType specified"
)
if kwargs.get("sortBy"):
raise ValidationError("USGS does not support sorting feature")

self.authenticate()

Expand Down
121 changes: 121 additions & 0 deletions eodag/plugins/search/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,24 @@
import logging
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

import orjson
from pydantic.fields import Field, FieldInfo

from eodag.api.product.metadata_mapping import (
DEFAULT_METADATA_MAPPING,
mtd_cfg_as_conversion_and_querypath,
)
from eodag.plugins.base import PluginTopic
from eodag.types.search_args import SortByList
from eodag.utils import (
DEFAULT_ITEMS_PER_PAGE,
DEFAULT_PAGE,
GENERIC_PRODUCT_TYPE,
Annotated,
format_dict_items,
update_nested_dict,
)
from eodag.utils.exceptions import ValidationError

if TYPE_CHECKING:
from eodag.api.product import EOProduct
Expand Down Expand Up @@ -181,3 +185,120 @@ def get_metadata_mapping(
return self.config.products.get(product_type, {}).get(
"metadata_mapping", self.config.metadata_mapping
)

def get_sort_by_arg(self, kwargs: Dict[str, Any]) -> Optional[SortByList]:
"""Extract the "sortBy" argument from the kwargs or the provider default sort configuration
:param kwargs: Search arguments
:type kwargs: Dict[str, Any]
:returns: The "sortBy" argument from the kwargs or the provider default sort configuration
:rtype: :class:`~eodag.types.search_args.SortByList`
"""
# remove "sortBy" from search args if exists because it is not part of metadata mapping,
# it will complete the query string or body once metadata mapping will be done
sort_by_arg_tmp = kwargs.pop("sortBy", None)
sort_by_arg = sort_by_arg_tmp or getattr(self.config, "sort", {}).get(
"sort_by_default", None
)
if not sort_by_arg_tmp and sort_by_arg:
logger.info(
f"{self.provider} is configured with default sorting by '{sort_by_arg[0][0]}' "
f"in {'ascending' if sort_by_arg[0][1] == 'ASC' else 'descending'} order"
)
return sort_by_arg

def build_sort_by(
self, sort_by_arg: SortByList
) -> Tuple[str, Dict[str, List[Dict[str, str]]]]:
"""Build the sorting part of the query string or body by transforming
the "sortBy" argument into a provider-specific string or dictionnary
:param sort_by_arg: the "sortBy" argument in EODAG format
:type sort_by_arg: :class:`~eodag.types.search_args.SortByList`
:returns: The "sortBy" argument in provider-specific format
:rtype: Union[str, Dict[str, List[Dict[str, str]]]]
"""
if not hasattr(self.config, "sort"):
raise ValidationError(f"{self.provider} does not support sorting feature")
# TODO: remove this code block when search args model validation is embeded
# remove duplicates
sort_by_arg = list(set(sort_by_arg))

sort_by_qs: str = ""
sort_by_qp: Dict[str, Any] = {}

provider_sort_by_tuples_used: List[Tuple[str, str]] = []
for eodag_sort_by_tuple in sort_by_arg:
eodag_sort_param = eodag_sort_by_tuple[0]
provider_sort_param = self.config.sort["sort_param_mapping"].get(
eodag_sort_param, None
)
if not provider_sort_param:
joined_eodag_params_to_map = ", ".join(
k for k in self.config.sort["sort_param_mapping"].keys()
)
params = set(self.config.sort["sort_param_mapping"].keys())
params.add(eodag_sort_param)
raise ValidationError(
f"'{eodag_sort_param}' parameter is not sortable with {self.provider}. "
f"Here is the list of sortable parameter(s) with {self.provider}: {joined_eodag_params_to_map}",
params,
)
eodag_sort_order = eodag_sort_by_tuple[1]
# TODO: remove this code block when search args model validation is embeded
# Remove leading and trailing whitespace(s) if exist
eodag_sort_order = eodag_sort_order.strip().upper()
if eodag_sort_order[:3] != "ASC" and eodag_sort_order[:3] != "DES":
raise ValidationError(
"Sorting order is invalid: it must be set to 'ASC' (ASCENDING) or "
f"'DESC' (DESCENDING), got '{eodag_sort_order}' with '{eodag_sort_param}' instead"
)
eodag_sort_order = eodag_sort_order[:3]

provider_sort_order = (
self.config.sort["sort_order_mapping"]["ascending"]
if eodag_sort_order == "ASC"
else self.config.sort["sort_order_mapping"]["descending"]
)
provider_sort_by_tuple: Tuple[str, str] = (
provider_sort_param,
provider_sort_order,
)
# TODO: remove this code block when search args model validation is embeded
for provider_sort_by_tuple_used in provider_sort_by_tuples_used:
# since duplicated tuples or dictionnaries have been removed, if two sorting parameters are equal,
# then their sorting order is different and there is a contradiction that would raise an error
if provider_sort_by_tuple[0] == provider_sort_by_tuple_used[0]:
raise ValidationError(
f"'{eodag_sort_param}' parameter is called several times to sort results with different "
"sorting orders. Please set it to only one ('ASC' (ASCENDING) or 'DESC' (DESCENDING))",
set([eodag_sort_param]),
)
provider_sort_by_tuples_used.append(provider_sort_by_tuple)

# TODO: move this code block to the top of this method when search args model validation is embeded
# check if the limit number of sorting parameter(s) is respected with this sorting parameter
if (
self.config.sort.get("max_sort_params", None)
and len(provider_sort_by_tuples_used)
> self.config.sort["max_sort_params"]
):
raise ValidationError(
f"Search results can be sorted by only {self.config.sort['max_sort_params']} "
f"parameter(s) with {self.provider}"
)

parsed_sort_by_tpl: str = self.config.sort["sort_by_tpl"].format(
sort_param=provider_sort_by_tuple[0],
sort_order=provider_sort_by_tuple[1],
)
try:
parsed_sort_by_tpl_dict: Dict[str, Any] = orjson.loads(
parsed_sort_by_tpl
)
sort_by_qp = update_nested_dict(
sort_by_qp, parsed_sort_by_tpl_dict, extend_list_values=True
)
except orjson.JSONDecodeError:
sort_by_qs += parsed_sort_by_tpl
return (sort_by_qs, sort_by_qp)
10 changes: 9 additions & 1 deletion eodag/plugins/search/data_request_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@
deepcopy,
string_to_jsonpath,
)
from eodag.utils.exceptions import NotAvailableError, RequestError, TimeOutError
from eodag.utils.exceptions import (
NotAvailableError,
RequestError,
TimeOutError,
ValidationError,
)

if TYPE_CHECKING:
from eodag.config import PluginConfig
Expand Down Expand Up @@ -128,6 +133,9 @@ def query(
"""
performs the search for a provider where several steps are required to fetch the data
"""
if kwargs.get("sortBy"):
raise ValidationError(f"{self.provider} does not support sorting feature")

product_type = kwargs.get("productType", None)
# replace "product_type" to "providerProductType" in search args if exists
# for compatibility with DataRequestSearch method
Expand Down
26 changes: 22 additions & 4 deletions eodag/plugins/search/qssearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
)
from eodag.plugins.search.base import Search
from eodag.types import json_field_definition_to_python, model_fields_to_annotated
from eodag.types.search_args import SortByList
from eodag.utils import (
DEFAULT_ITEMS_PER_PAGE,
DEFAULT_PAGE,
Expand Down Expand Up @@ -471,6 +472,11 @@ def query(
# remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
kwargs.pop("product_type", None)

sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
sort_by_qs, _ = (
("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
)

provider_product_type = self.map_product_type(product_type)
keywords = {k: v for k, v in kwargs.items() if k != "auth" and v is not None}
keywords["productType"] = (
Expand Down Expand Up @@ -510,7 +516,11 @@ def query(
self.query_params = qp
self.query_string = qs
self.search_urls, total_items = self.collect_search_urls(
page=page, items_per_page=items_per_page, count=count, **kwargs
page=page,
items_per_page=items_per_page,
count=count,
sort_by_qs=sort_by_qs,
**kwargs,
)
if not count and hasattr(self, "total_items_nb"):
# do not try to extract total_items from search results if count is False
Expand Down Expand Up @@ -559,6 +569,10 @@ def collect_search_urls(
urls = []
total_results = 0 if count else None

# use only sort_by parameters for search, not for count
# and remove potential leading '&'
qs_with_sort = (self.query_string + kwargs.get("sort_by_qs", "")).strip("&")

if "count_endpoint" not in self.config.pagination:
# if count_endpoint is not set, total_results should be extracted from search result
total_results = None
Expand Down Expand Up @@ -594,14 +608,14 @@ def collect_search_urls(
total_results += _total_results or 0
next_url = self.config.pagination["next_page_url_tpl"].format(
url=search_endpoint,
search=self.query_string,
search=qs_with_sort,
items_per_page=items_per_page,
page=page,
skip=(page - 1) * items_per_page,
skip_base_1=(page - 1) * items_per_page + 1,
)
else:
next_url = "{}?{}".format(search_endpoint, self.query_string)
next_url = "{}?{}".format(search_endpoint, qs_with_sort)
urls.append(next_url)
return urls, total_results

Expand Down Expand Up @@ -1032,6 +1046,10 @@ def query(
product_type = kwargs.get("productType", None)
# remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
kwargs.pop("product_type", None)
sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
_, sort_by_qp = (
("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
)
provider_product_type = self.map_product_type(product_type)
keywords = {k: v for k, v in kwargs.items() if k != "auth" and v is not None}

Expand Down Expand Up @@ -1116,7 +1134,7 @@ def query(
if isinstance(product_type_metadata_mapping.get(k, []), list)
):
return [], 0
self.query_params = qp
self.query_params = dict(qp, **sort_by_qp)
self.search_urls, total_items = self.collect_search_urls(
page=page, items_per_page=items_per_page, count=count, **kwargs
)
Expand Down
Loading

0 comments on commit dd9a2ef

Please sign in to comment.