diff --git a/.github/workflows/unified-release.yml b/.github/workflows/unified-release.yml index 4ece02a6..7ecb6dc3 100644 --- a/.github/workflows/unified-release.yml +++ b/.github/workflows/unified-release.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - stack_version: ['2.1.1'] + stack_version: ['2.2.0'] steps: - name: Checkout diff --git a/.gitignore b/.gitignore index 9c942d0e..5bf425ef 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *$py.class +*.py[co] # C extensions *.so @@ -51,6 +52,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +junit.xml # Translations *.mo @@ -120,6 +122,7 @@ venv.bak/ # mkdocs documentation /site +docs/_build # mypy .mypy_cache/ @@ -141,4 +144,8 @@ cython_debug/ # opensearch files test_opensearch/cover test_opensearch/local.py -.ci/output \ No newline at end of file +.ci/output + +#Vi text editor +.*.swp +*~ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 157f89aa..8cd44578 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Added SigV4 support for Async Opensearch Client ([#254](https://github.com/opensearch-project/opensearch-py/pull/254)) - Compatibility with OpenSearch 2.1.0 - 2.4.1 ([#257](https://github.com/opensearch-project/opensearch-py/pull/257)) - Adding explicit parameters for AIOHttpConnection and AsyncTransport ([#276](https://github.com/opensearch-project/opensearch-py/pull/276)) +- Merging opensearch-dsl-py into opensearch-py ([#287](https://github.com/opensearch-project/opensearch-py/pull/287)) ### Changed - Updated getting started to user guide ([#233](https://github.com/opensearch-project/opensearch-py/pull/233)) - Updated CA certificate handling to check OpenSSL environment variables before defaulting to certifi ([#196](https://github.com/opensearch-project/opensearch-py/pull/196)) diff --git a/USER_GUIDE.md b/USER_GUIDE.md index 1d525002..632b669b 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -11,6 +11,8 @@ - [Deleting an index](#deleting-an-index) - [Making API calls](#making-api-calls) - [Point in time API](#point-in-time-api) + - [Using High-level Python client](#using-high-level-python-client) + - [Searching for documents with filters](#searching-for-documents-with-filters) - [Using plugins](#using-plugins) - [Alerting plugin](#alerting-plugin) - [**Searching for monitors**](#searching-for-monitors) @@ -228,6 +230,37 @@ print('\n The deleted point in time:') print(response) ``` +## Using High-level Python client +High-level python client is now merged into Low-level python client. Thus, opensearch-py supports creating and indexing documents, searching with and without filters, and updating documents using queries.[High-level Python client documentation](https://opensearch.org/docs/latest/clients/python-high-level/). + +All the APIs newly added from opensearch-dsl-py are listed in [docs](https://github.com/opensearch-project/opensearch-py/tree/main/docs/source/api-ref). + +In the below example, 'Search' API from High-level Python Client is used. + +### Searching for documents with filters + +```python +from opensearchpy import OpenSearch, Search + + # Use the above mentioned examples for creating client. + # Then,create an index + # Add a document to the index. + + # Search for the document. + s = Search(using=client, index=index_name) \ + .filter("term", category="search") \ + .query("match", title="python") + + response = s.execute() + + print('\nSearch results:') + for hit in response: + print(hit.meta.score, hit.title) + + # Delete the document. + # Delete the index. +``` + ## Using plugins Plugin client definitions can be found here -- @@ -486,7 +519,7 @@ async def search(): search() ``` -======= + ### Using Kerberos There are several python packages that provide Kerberos support over HTTP connections, such as [requests-kerberos](http://pypi.org/project/requests-kerberos) and [requests-gssapi](https://pypi.org/project/requests-gssapi). The following example shows how to setup the authentication. Note that some of the parameters, such as `mutual_authentication` might depend on the server settings. diff --git a/docs/source/api-ref/connection.md b/docs/source/api-ref/connection.md index c5bca91d..8ac0e3ec 100644 --- a/docs/source/api-ref/connection.md +++ b/docs/source/api-ref/connection.md @@ -11,3 +11,7 @@ ```{eval-rst} .. autoclass:: opensearchpy.Urllib3HttpConnection ``` + +```{eval-rst} +.. autoclass:: opensearchpy.connections +``` \ No newline at end of file diff --git a/docs/source/api-ref/exceptions.md b/docs/source/api-ref/exceptions.md index 90e05166..aa3b2d65 100644 --- a/docs/source/api-ref/exceptions.md +++ b/docs/source/api-ref/exceptions.md @@ -55,3 +55,19 @@ ```{eval-rst} .. autoclass:: opensearchpy.TransportError ``` + +```{eval-rst} +.. autoclass:: opensearchpy.OpenSearchDslException +``` + +```{eval-rst} +.. autoclass:: opensearchpy.IllegalOperation +``` + +```{eval-rst} +.. autoclass:: opensearchpy.UnknownDslObject +``` + +```{eval-rst} +.. autoclass:: opensearchpy.ValidationException +``` diff --git a/docs/source/api-ref/helpers.md b/docs/source/api-ref/helpers.md new file mode 100644 index 00000000..6f1aa79e --- /dev/null +++ b/docs/source/api-ref/helpers.md @@ -0,0 +1,23 @@ +# helpers + +```{toctree} +--- +glob: +titlesonly: +maxdepth: 1 +--- + +helpers/aggs +helpers/analysis +helpers/document +helpers/faceted_search +helpers/field +helpers/function +helpers/index +helpers/mapping +helpers/query +helpers/search +helpers/update_by_query +helpers/wrappers + +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/aggs.md b/docs/source/api-ref/helpers/aggs.md new file mode 100644 index 00000000..f7b9c15c --- /dev/null +++ b/docs/source/api-ref/helpers/aggs.md @@ -0,0 +1,5 @@ +# aggs + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.aggs.Agg +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/analysis.md b/docs/source/api-ref/helpers/analysis.md new file mode 100644 index 00000000..0c894b50 --- /dev/null +++ b/docs/source/api-ref/helpers/analysis.md @@ -0,0 +1,5 @@ +# analysis + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.analysis.Analyzer +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/document.md b/docs/source/api-ref/helpers/document.md new file mode 100644 index 00000000..e05bf733 --- /dev/null +++ b/docs/source/api-ref/helpers/document.md @@ -0,0 +1,5 @@ +# document + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.document.Document +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/faceted_search.md b/docs/source/api-ref/helpers/faceted_search.md new file mode 100644 index 00000000..cbed4ac9 --- /dev/null +++ b/docs/source/api-ref/helpers/faceted_search.md @@ -0,0 +1,5 @@ +# faceted_search + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.faceted_search.FacetedSearch +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/field.md b/docs/source/api-ref/helpers/field.md new file mode 100644 index 00000000..e2e2f917 --- /dev/null +++ b/docs/source/api-ref/helpers/field.md @@ -0,0 +1,5 @@ +# field + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.field.Field +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/function.md b/docs/source/api-ref/helpers/function.md new file mode 100644 index 00000000..863bddbf --- /dev/null +++ b/docs/source/api-ref/helpers/function.md @@ -0,0 +1,5 @@ +# function + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.function.ScoreFunction +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/index.md b/docs/source/api-ref/helpers/index.md new file mode 100644 index 00000000..88d6a185 --- /dev/null +++ b/docs/source/api-ref/helpers/index.md @@ -0,0 +1,5 @@ +# index + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.index.Index +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/mapping.md b/docs/source/api-ref/helpers/mapping.md new file mode 100644 index 00000000..25e8c3a4 --- /dev/null +++ b/docs/source/api-ref/helpers/mapping.md @@ -0,0 +1,5 @@ +# mapping + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.mapping.Mapping +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/query.md b/docs/source/api-ref/helpers/query.md new file mode 100644 index 00000000..9b67694d --- /dev/null +++ b/docs/source/api-ref/helpers/query.md @@ -0,0 +1,5 @@ +# query + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.query.Query +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/search.md b/docs/source/api-ref/helpers/search.md new file mode 100644 index 00000000..35dfeed3 --- /dev/null +++ b/docs/source/api-ref/helpers/search.md @@ -0,0 +1,5 @@ +# search + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.search.Search +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/update_by_query.md b/docs/source/api-ref/helpers/update_by_query.md new file mode 100644 index 00000000..39a32659 --- /dev/null +++ b/docs/source/api-ref/helpers/update_by_query.md @@ -0,0 +1,5 @@ +# update_by_query + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.update_by_query.UpdateByQuery +``` \ No newline at end of file diff --git a/docs/source/api-ref/helpers/wrappers.md b/docs/source/api-ref/helpers/wrappers.md new file mode 100644 index 00000000..6d99abf1 --- /dev/null +++ b/docs/source/api-ref/helpers/wrappers.md @@ -0,0 +1,5 @@ +# wrappers + +```{eval-rst} +.. autoclass:: opensearchpy.helpers.wrappers.Range +``` \ No newline at end of file diff --git a/docs/source/api-ref/serializer.md b/docs/source/api-ref/serializer.md index b96a4b51..4e9fd751 100644 --- a/docs/source/api-ref/serializer.md +++ b/docs/source/api-ref/serializer.md @@ -3,3 +3,7 @@ ```{eval-rst} .. autoclass:: opensearchpy.JSONSerializer ``` + +```{eval-rst} +.. autoclass:: opensearchpy.AttrJSONSerializer +``` \ No newline at end of file diff --git a/opensearchpy/__init__.py b/opensearchpy/__init__.py index c66c4613..6669f179 100644 --- a/opensearchpy/__init__.py +++ b/opensearchpy/__init__.py @@ -44,7 +44,12 @@ logger.addHandler(logging.NullHandler()) from .client import OpenSearch -from .connection import Connection, RequestsHttpConnection, Urllib3HttpConnection +from .connection import ( + Connection, + RequestsHttpConnection, + Urllib3HttpConnection, + connections, +) from .connection_pool import ConnectionPool, ConnectionSelector, RoundRobinSelector from .exceptions import ( AuthenticationException, @@ -52,17 +57,82 @@ ConflictError, ConnectionError, ConnectionTimeout, + IllegalOperation, ImproperlyConfigured, NotFoundError, OpenSearchDeprecationWarning, + OpenSearchDslException, OpenSearchException, OpenSearchWarning, RequestError, SerializationError, SSLError, TransportError, + UnknownDslObject, + ValidationException, ) from .helpers import AWSV4SignerAsyncAuth, AWSV4SignerAuth +from .helpers.aggs import A +from .helpers.analysis import analyzer, char_filter, normalizer, token_filter, tokenizer +from .helpers.document import Document, InnerDoc, MetaField +from .helpers.faceted_search import ( + DateHistogramFacet, + Facet, + FacetedResponse, + FacetedSearch, + HistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) +from .helpers.field import ( + Binary, + Boolean, + Byte, + Completion, + CustomField, + Date, + DateRange, + DenseVector, + Double, + DoubleRange, + Field, + Float, + FloatRange, + GeoPoint, + GeoShape, + HalfFloat, + Integer, + IntegerRange, + Ip, + IpRange, + Join, + Keyword, + Long, + LongRange, + Murmur3, + Nested, + Object, + Percolator, + RangeField, + RankFeature, + RankFeatures, + ScaledFloat, + SearchAsYouType, + Short, + SparseVector, + Text, + TokenCount, + construct_field, +) +from .helpers.function import SF +from .helpers.index import Index, IndexTemplate +from .helpers.mapping import Mapping +from .helpers.query import Q +from .helpers.search import MultiSearch, Search +from .helpers.update_by_query import UpdateByQuery +from .helpers.utils import AttrDict, AttrList, DslBase +from .helpers.wrappers import Range from .serializer import JSONSerializer from .transport import Transport @@ -97,6 +167,78 @@ "OpenSearchDeprecationWarning", "AWSV4SignerAuth", "AWSV4SignerAsyncAuth", + "A", + "AttrDict", + "AttrList", + "Binary", + "Boolean", + "Byte", + "Completion", + "CustomField", + "Date", + "DateHistogramFacet", + "DateRange", + "DenseVector", + "Document", + "Double", + "DoubleRange", + "DslBase", + "Facet", + "FacetedResponse", + "FacetedSearch", + "Field", + "Float", + "FloatRange", + "GeoPoint", + "GeoShape", + "HalfFloat", + "HistogramFacet", + "IllegalOperation", + "Index", + "IndexTemplate", + "InnerDoc", + "Integer", + "IntegerRange", + "Ip", + "IpRange", + "Join", + "Keyword", + "Long", + "LongRange", + "Mapping", + "MetaField", + "MultiSearch", + "Murmur3", + "Nested", + "NestedFacet", + "Object", + "OpenSearchDslException", + "Percolator", + "Q", + "Range", + "RangeFacet", + "RangeField", + "RankFeature", + "RankFeatures", + "SF", + "ScaledFloat", + "Search", + "SearchAsYouType", + "Short", + "SparseVector", + "TermsFacet", + "Text", + "TokenCount", + "UnknownDslObject", + "UpdateByQuery", + "ValidationException", + "analyzer", + "char_filter", + "connections", + "construct_field", + "normalizer", + "token_filter", + "tokenizer", ] try: diff --git a/opensearchpy/__init__.pyi b/opensearchpy/__init__.pyi index a53caac1..01fccaec 100644 --- a/opensearchpy/__init__.pyi +++ b/opensearchpy/__init__.pyi @@ -32,6 +32,7 @@ from .connection import AsyncHttpConnection as AsyncHttpConnection from .connection import Connection as Connection from .connection import RequestsHttpConnection as RequestsHttpConnection from .connection import Urllib3HttpConnection as Urllib3HttpConnection +from .connection import connections as connections from .connection_pool import ConnectionPool as ConnectionPool from .connection_pool import ConnectionSelector as ConnectionSelector from .connection_pool import RoundRobinSelector as RoundRobinSelector @@ -40,14 +41,82 @@ from .exceptions import AuthorizationException as AuthorizationException from .exceptions import ConflictError as ConflictError from .exceptions import ConnectionError as ConnectionError from .exceptions import ConnectionTimeout as ConnectionTimeout +from .exceptions import IllegalOperation as IllegalOperation from .exceptions import ImproperlyConfigured as ImproperlyConfigured from .exceptions import NotFoundError as NotFoundError from .exceptions import OpenSearchDeprecationWarning as OpenSearchDeprecationWarning +from .exceptions import OpenSearchDslException as OpenSearchDslException from .exceptions import OpenSearchException as OpenSearchException +from .exceptions import OpenSearchWarning as OpenSearchWarning from .exceptions import RequestError as RequestError from .exceptions import SerializationError as SerializationError from .exceptions import SSLError as SSLError from .exceptions import TransportError as TransportError +from .exceptions import UnknownDslObject as UnknownDslObject +from .exceptions import ValidationException as ValidationException +from .helpers.aggs import A as A +from .helpers.analysis import Analyzer, CharFilter, Normalizer, TokenFilter, Tokenizer +from .helpers.document import Document as Document +from .helpers.document import InnerDoc as InnerDoc +from .helpers.document import MetaField as MetaField +from .helpers.faceted_search import DateHistogramFacet as DateHistogramFacet +from .helpers.faceted_search import Facet as Facet +from .helpers.faceted_search import FacetedResponse as FacetedResponse +from .helpers.faceted_search import FacetedSearch as FacetedSearch +from .helpers.faceted_search import HistogramFacet as HistogramFacet +from .helpers.faceted_search import NestedFacet as NestedFacet +from .helpers.faceted_search import RangeFacet as RangeFacet +from .helpers.faceted_search import TermsFacet as TermsFacet +from .helpers.field import Binary as Binary +from .helpers.field import Boolean as Boolean +from .helpers.field import Byte as Byte +from .helpers.field import Completion as Completion +from .helpers.field import CustomField as CustomField +from .helpers.field import Date as Date +from .helpers.field import DateRange as DateRange +from .helpers.field import DenseVector as DenseVector +from .helpers.field import Double as Double +from .helpers.field import DoubleRange as DoubleRange +from .helpers.field import Field as Field +from .helpers.field import Float as Float +from .helpers.field import FloatRange as FloatRange +from .helpers.field import GeoPoint as GeoPoint +from .helpers.field import GeoShape as GeoShape +from .helpers.field import HalfFloat as HalfFloat +from .helpers.field import Integer as Integer +from .helpers.field import IntegerRange as IntegerRange +from .helpers.field import Ip as Ip +from .helpers.field import IpRange as IpRange +from .helpers.field import Join as Join +from .helpers.field import Keyword as Keyword +from .helpers.field import Long as Long +from .helpers.field import LongRange as LongRange +from .helpers.field import Murmur3 as Murmur3 +from .helpers.field import Nested as Nested +from .helpers.field import Object as Object +from .helpers.field import Percolator as Percolator +from .helpers.field import RangeField as RangeField +from .helpers.field import RankFeature as RankFeature +from .helpers.field import RankFeatures as RankFeatures +from .helpers.field import ScaledFloat as ScaledFloat +from .helpers.field import SearchAsYouType as SearchAsYouType +from .helpers.field import Short as Short +from .helpers.field import SparseVector as SparseVector +from .helpers.field import Text as Text +from .helpers.field import TokenCount as TokenCount +from .helpers.field import construct_field as construct_field +from .helpers.function import SF as SF +from .helpers.index import Index as Index +from .helpers.index import IndexTemplate as IndexTemplate +from .helpers.mapping import Mapping as Mapping +from .helpers.query import Q as Q +from .helpers.search import MultiSearch as MultiSearch +from .helpers.search import Search as Search +from .helpers.update_by_query import UpdateByQuery as UpdateByQuery +from .helpers.utils import AttrDict as AttrDict +from .helpers.utils import AttrList as AttrList +from .helpers.utils import DslBase as DslBase +from .helpers.wrappers import Range as Range from .serializer import JSONSerializer as JSONSerializer from .transport import Transport as Transport @@ -57,6 +126,7 @@ try: from ._async.client import AsyncOpenSearch as AsyncOpenSearch from ._async.http_aiohttp import AIOHttpConnection as AIOHttpConnection + from ._async.http_aiohttp import AsyncConnection as AsyncConnection from ._async.transport import AsyncTransport as AsyncTransport from .helpers import AWSV4SignerAsyncAuth as AWSV4SignerAsyncAuth from .helpers import AWSV4SignerAuth as AWSV4SignerAuth diff --git a/opensearchpy/_async/helpers.py b/opensearchpy/_async/helpers.py index cb0c43ef..78ae7301 100644 --- a/opensearchpy/_async/helpers.py +++ b/opensearchpy/_async/helpers.py @@ -143,7 +143,6 @@ async def async_streaming_bulk( *args, **kwargs ): - """ Streaming bulk consumes actions from the iterable passed in and yields results per action. For non-streaming usecases use @@ -185,7 +184,6 @@ async def map_actions(): async for bulk_data, bulk_actions in _chunk_actions( map_actions(), chunk_size, max_chunk_bytes, client.transport.serializer ): - for attempt in range(max_retries + 1): to_retry, to_retry_data = [], [] if attempt: @@ -207,7 +205,6 @@ async def map_actions(): **kwargs, ), ): - if not ok: action, info = info.popitem() # retry if retries enabled, we get 429, and we are not @@ -422,7 +419,6 @@ async def async_reindex( scan_kwargs={}, bulk_kwargs={}, ): - """ Reindex all documents from one index that satisfy a given query to another, potentially (if `target_client` is specified) on a different cluster. diff --git a/opensearchpy/_async/transport.py b/opensearchpy/_async/transport.py index 65b2bdec..e93344bc 100644 --- a/opensearchpy/_async/transport.py +++ b/opensearchpy/_async/transport.py @@ -160,7 +160,6 @@ async def _async_init(self): # ... and we can start sniffing in the background. if self.sniffing_task is None and self.sniff_on_start: - # Create an asyncio.Event for future calls to block on # until the initial sniffing task completes. self._sniff_on_start_event = asyncio.Event() diff --git a/opensearchpy/_version.py b/opensearchpy/_version.py index 7ed5f16a..017a188a 100644 --- a/opensearchpy/_version.py +++ b/opensearchpy/_version.py @@ -24,4 +24,4 @@ # specific language governing permissions and limitations # under the License. -__versionstr__ = "2.1.1" +__versionstr__ = "2.2.0" diff --git a/opensearchpy/connection/connections.py b/opensearchpy/connection/connections.py new file mode 100644 index 00000000..857cba3a --- /dev/null +++ b/opensearchpy/connection/connections.py @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from six import string_types + +import opensearchpy +from opensearchpy.serializer import serializer + + +class Connections(object): + """ + Class responsible for holding connections to different clusters. Used as a + singleton in this module. + """ + + def __init__(self): + self._kwargs = {} + self._conns = {} + + def configure(self, **kwargs): + """ + Configure multiple connections at once, useful for passing in config + dictionaries obtained from other sources, like Django's settings or a + configuration management tool. + + Example:: + + connections.configure( + default={'hosts': 'localhost'}, + dev={'hosts': ['opensearchdev1.example.com:9200'], 'sniff_on_start': True}, + ) + + Connections will only be constructed lazily when requested through + ``get_connection``. + """ + for k in list(self._conns): + # try and preserve existing client to keep the persistent connections alive + if k in self._kwargs and kwargs.get(k, None) == self._kwargs[k]: + continue + del self._conns[k] + self._kwargs = kwargs + + def add_connection(self, alias, conn): + """ + Add a connection object, it will be passed through as-is. + """ + self._conns[alias] = conn + + def remove_connection(self, alias): + """ + Remove connection from the registry. Raises ``KeyError`` if connection + wasn't found. + """ + errors = 0 + for d in (self._conns, self._kwargs): + try: + del d[alias] + except KeyError: + errors += 1 + + if errors == 2: + raise KeyError("There is no connection with alias %r." % alias) + + def create_connection(self, alias="default", **kwargs): + """ + Construct an instance of ``opensearchpy.OpenSearch`` and register + it under given alias. + """ + kwargs.setdefault("serializer", serializer) + conn = self._conns[alias] = opensearchpy.OpenSearch(**kwargs) + return conn + + def get_connection(self, alias="default"): + """ + Retrieve a connection, construct it if necessary (only configuration + was passed to us). If a non-string alias has been passed through we + assume it's already a client instance and will just return it as-is. + + Raises ``KeyError`` if no client (or its definition) is registered + under the alias. + """ + # do not check isinstance(OpenSearch) so that people can wrap their + # clients + if not isinstance(alias, string_types): + return alias + + # connection already established + try: + return self._conns[alias] + except KeyError: + pass + + # if not, try to create it + try: + return self.create_connection(alias, **self._kwargs[alias]) + except KeyError: + # no connection and no kwargs to set one up + raise KeyError("There is no connection with alias %r." % alias) + + +connections = Connections() +configure = connections.configure +add_connection = connections.add_connection +remove_connection = connections.remove_connection +create_connection = connections.create_connection +get_connection = connections.get_connection diff --git a/opensearchpy/connection/connections.pyi b/opensearchpy/connection/connections.pyi new file mode 100644 index 00000000..07814ba4 --- /dev/null +++ b/opensearchpy/connection/connections.pyi @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT. + +class Connections(object): ... diff --git a/opensearchpy/connection/http_async.py b/opensearchpy/connection/http_async.py index 8c2c94d2..af014e0a 100644 --- a/opensearchpy/connection/http_async.py +++ b/opensearchpy/connection/http_async.py @@ -8,6 +8,7 @@ # GitHub history for details. # + import asyncio import os import ssl @@ -196,7 +197,6 @@ async def perform_request( start = self.loop.time() try: - async with self.session.request( method, url, diff --git a/opensearchpy/exceptions.py b/opensearchpy/exceptions.py index d79caee2..cc35c91f 100644 --- a/opensearchpy/exceptions.py +++ b/opensearchpy/exceptions.py @@ -38,6 +38,12 @@ "ConnectionTimeout", "AuthenticationException", "AuthorizationException", + "OpenSearchDslException", + "UnknownDslObject", + "ValidationException", + "IllegalOperation", + "OpenSearchWarning", + "OpenSearchDeprecationWarning", ] @@ -163,6 +169,22 @@ class AuthorizationException(TransportError): """Exception representing a 403 status code.""" +class OpenSearchDslException(Exception): + """Base class for all OpenSearchDsl exceptions""" + + +class UnknownDslObject(OpenSearchDslException): + """Exception representing UnknownDSLObject""" + + +class ValidationException(ValueError, OpenSearchDslException): + """Exception representing Validation Error""" + + +class IllegalOperation(OpenSearchDslException): + """Exception representing IllegalOperation""" + + class OpenSearchWarning(Warning): """Warning that is raised when a deprecated option or incorrect usage is flagged via the 'Warning' HTTP header. diff --git a/opensearchpy/exceptions.pyi b/opensearchpy/exceptions.pyi index 66bc4787..8adafdd8 100644 --- a/opensearchpy/exceptions.pyi +++ b/opensearchpy/exceptions.pyi @@ -52,6 +52,10 @@ class ConflictError(TransportError): ... class RequestError(TransportError): ... class AuthenticationException(TransportError): ... class AuthorizationException(TransportError): ... +class OpenSearchDslException(Exception): ... +class UnknownDslObject(OpenSearchDslException): ... +class ValidationException(ValueError, OpenSearchDslException): ... +class IllegalOperation(OpenSearchDslException): ... class OpenSearchWarning(Warning): ... OpenSearchDeprecationWarning = OpenSearchWarning diff --git a/opensearchpy/helpers/actions.py b/opensearchpy/helpers/actions.py index 9f2ddfd2..e565256f 100644 --- a/opensearchpy/helpers/actions.py +++ b/opensearchpy/helpers/actions.py @@ -280,7 +280,6 @@ def streaming_bulk( *args, **kwargs ): - """ Streaming bulk consumes actions from the iterable passed in and yields results per action. For non-streaming usecases use @@ -319,7 +318,6 @@ def streaming_bulk( for bulk_data, bulk_actions in _chunk_actions( actions, chunk_size, max_chunk_bytes, client.transport.serializer ): - for attempt in range(max_retries + 1): to_retry, to_retry_data = [], [] if attempt: @@ -339,7 +337,6 @@ def streaming_bulk( **kwargs ), ): - if not ok: action, info = info.popitem() # retry if retries enabled, we get 429, and we are not @@ -621,7 +618,6 @@ def reindex( scan_kwargs={}, bulk_kwargs={}, ): - """ Reindex all documents from one index that satisfy a given query to another, potentially (if `target_client` is specified) on a different cluster. diff --git a/opensearchpy/helpers/aggs.py b/opensearchpy/helpers/aggs.py new file mode 100644 index 00000000..5a7f800c --- /dev/null +++ b/opensearchpy/helpers/aggs.py @@ -0,0 +1,454 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from .response.aggs import AggResponse, BucketData, FieldBucketData, TopHitsData +from .utils import DslBase + + +def A(name_or_agg, filter=None, **params): + if filter is not None: + if name_or_agg != "filter": + raise ValueError( + "Aggregation %r doesn't accept positional argument 'filter'." + % name_or_agg + ) + params["filter"] = filter + + # {"terms": {"field": "tags"}, "aggs": {...}} + if isinstance(name_or_agg, collections_abc.Mapping): + if params: + raise ValueError("A() cannot accept parameters when passing in a dict.") + # copy to avoid modifying in-place + agg = name_or_agg.copy() + # pop out nested aggs + aggs = agg.pop("aggs", None) + # pop out meta data + meta = agg.pop("meta", None) + # should be {"terms": {"field": "tags"}} + if len(agg) != 1: + raise ValueError( + 'A() can only accept dict with an aggregation ({"terms": {...}}). ' + "Instead it got (%r)" % name_or_agg + ) + agg_type, params = agg.popitem() + if aggs: + params = params.copy() + params["aggs"] = aggs + if meta: + params = params.copy() + params["meta"] = meta + return Agg.get_dsl_class(agg_type)(_expand__to_dot=False, **params) + + # Terms(...) just return the nested agg + elif isinstance(name_or_agg, Agg): + if params: + raise ValueError( + "A() cannot accept parameters when passing in an Agg object." + ) + return name_or_agg + + # "terms", field="tags" + return Agg.get_dsl_class(name_or_agg)(**params) + + +class Agg(DslBase): + _type_name = "agg" + _type_shortcut = staticmethod(A) + name = None + + def __contains__(self, key): + return False + + def to_dict(self): + d = super(Agg, self).to_dict() + if "meta" in d[self.name]: + d["meta"] = d[self.name].pop("meta") + return d + + def result(self, search, data): + return AggResponse(self, search, data) + + +class AggBase(object): + _param_defs = { + "aggs": {"type": "agg", "hash": True}, + } + + def __contains__(self, key): + return key in self._params.get("aggs", {}) + + def __getitem__(self, agg_name): + agg = self._params.setdefault("aggs", {})[agg_name] # propagate KeyError + + # make sure we're not mutating a shared state - whenever accessing a + # bucket, return a shallow copy of it to be safe + if isinstance(agg, Bucket): + agg = A(agg.name, **agg._params) + # be sure to store the copy so any modifications to it will affect us + self._params["aggs"][agg_name] = agg + + return agg + + def __setitem__(self, agg_name, agg): + self.aggs[agg_name] = A(agg) + + def __iter__(self): + return iter(self.aggs) + + def _agg(self, bucket, name, agg_type, *args, **params): + agg = self[name] = A(agg_type, *args, **params) + + # For chaining - when creating new buckets return them... + if bucket: + return agg + # otherwise return self._base so we can keep chaining + else: + return self._base + + def metric(self, name, agg_type, *args, **params): + return self._agg(False, name, agg_type, *args, **params) + + def bucket(self, name, agg_type, *args, **params): + return self._agg(True, name, agg_type, *args, **params) + + def pipeline(self, name, agg_type, *args, **params): + return self._agg(False, name, agg_type, *args, **params) + + def result(self, search, data): + return BucketData(self, search, data) + + +class Bucket(AggBase, Agg): + def __init__(self, **params): + super(Bucket, self).__init__(**params) + # remember self for chaining + self._base = self + + def to_dict(self): + d = super(AggBase, self).to_dict() + if "aggs" in d[self.name]: + d["aggs"] = d[self.name].pop("aggs") + return d + + +class Filter(Bucket): + name = "filter" + _param_defs = { + "filter": {"type": "query"}, + "aggs": {"type": "agg", "hash": True}, + } + + def __init__(self, filter=None, **params): + if filter is not None: + params["filter"] = filter + super(Filter, self).__init__(**params) + + def to_dict(self): + d = super(Filter, self).to_dict() + d[self.name].update(d[self.name].pop("filter", {})) + return d + + +class Pipeline(Agg): + pass + + +# bucket aggregations +class Filters(Bucket): + name = "filters" + _param_defs = { + "filters": {"type": "query", "hash": True}, + "aggs": {"type": "agg", "hash": True}, + } + + +class Children(Bucket): + name = "children" + + +class Parent(Bucket): + name = "parent" + + +class DateHistogram(Bucket): + name = "date_histogram" + + def result(self, search, data): + return FieldBucketData(self, search, data) + + +class AutoDateHistogram(DateHistogram): + name = "auto_date_histogram" + + +class DateRange(Bucket): + name = "date_range" + + +class GeoDistance(Bucket): + name = "geo_distance" + + +class GeohashGrid(Bucket): + name = "geohash_grid" + + +class GeotileGrid(Bucket): + name = "geotile_grid" + + +class GeoCentroid(Bucket): + name = "geo_centroid" + + +class Global(Bucket): + name = "global" + + +class Histogram(Bucket): + name = "histogram" + + def result(self, search, data): + return FieldBucketData(self, search, data) + + +class IPRange(Bucket): + name = "ip_range" + + +class Missing(Bucket): + name = "missing" + + +class Nested(Bucket): + name = "nested" + + +class Range(Bucket): + name = "range" + + +class RareTerms(Bucket): + name = "rare_terms" + + def result(self, search, data): + return FieldBucketData(self, search, data) + + +class ReverseNested(Bucket): + name = "reverse_nested" + + +class SignificantTerms(Bucket): + name = "significant_terms" + + +class SignificantText(Bucket): + name = "significant_text" + + +class Terms(Bucket): + name = "terms" + + def result(self, search, data): + return FieldBucketData(self, search, data) + + +class Sampler(Bucket): + name = "sampler" + + +class DiversifiedSampler(Bucket): + name = "diversified_sampler" + + +class Composite(Bucket): + name = "composite" + _param_defs = { + "sources": {"type": "agg", "hash": True, "multi": True}, + "aggs": {"type": "agg", "hash": True}, + } + + +class VariableWidthHistogram(Bucket): + name = "variable_width_histogram" + + def result(self, search, data): + return FieldBucketData(self, search, data) + + +# metric aggregations +class TopHits(Agg): + name = "top_hits" + + def result(self, search, data): + return TopHitsData(self, search, data) + + +class Avg(Agg): + name = "avg" + + +class WeightedAvg(Agg): + name = "weighted_avg" + + +class Cardinality(Agg): + name = "cardinality" + + +class ExtendedStats(Agg): + name = "extended_stats" + + +class Boxplot(Agg): + name = "boxplot" + + +class GeoBounds(Agg): + name = "geo_bounds" + + +class Max(Agg): + name = "max" + + +class MedianAbsoluteDeviation(Agg): + name = "median_absolute_deviation" + + +class Min(Agg): + name = "min" + + +class Percentiles(Agg): + name = "percentiles" + + +class PercentileRanks(Agg): + name = "percentile_ranks" + + +class ScriptedMetric(Agg): + name = "scripted_metric" + + +class Stats(Agg): + name = "stats" + + +class Sum(Agg): + name = "sum" + + +class TTest(Agg): + name = "t_test" + + +class ValueCount(Agg): + name = "value_count" + + +# pipeline aggregations +class AvgBucket(Pipeline): + name = "avg_bucket" + + +class BucketScript(Pipeline): + name = "bucket_script" + + +class BucketSelector(Pipeline): + name = "bucket_selector" + + +class CumulativeSum(Pipeline): + name = "cumulative_sum" + + +class CumulativeCardinality(Pipeline): + name = "cumulative_cardinality" + + +class Derivative(Pipeline): + name = "derivative" + + +class ExtendedStatsBucket(Pipeline): + name = "extended_stats_bucket" + + +class Inference(Pipeline): + name = "inference" + + +class MaxBucket(Pipeline): + name = "max_bucket" + + +class MinBucket(Pipeline): + name = "min_bucket" + + +class MovingFn(Pipeline): + name = "moving_fn" + + +class MovingAvg(Pipeline): + name = "moving_avg" + + +class MovingPercentiles(Pipeline): + name = "moving_percentiles" + + +class Normalize(Pipeline): + name = "normalize" + + +class PercentilesBucket(Pipeline): + name = "percentiles_bucket" + + +class SerialDiff(Pipeline): + name = "serial_diff" + + +class StatsBucket(Pipeline): + name = "stats_bucket" + + +class SumBucket(Pipeline): + name = "sum_bucket" + + +class BucketSort(Pipeline): + name = "bucket_sort" diff --git a/opensearchpy/helpers/aggs.pyi b/opensearchpy/helpers/aggs.pyi new file mode 100644 index 00000000..e3f6e93c --- /dev/null +++ b/opensearchpy/helpers/aggs.pyi @@ -0,0 +1,104 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import Any + +from _typeshed import Incomplete + +from ..response.aggs import AggResponse as AggResponse +from ..response.aggs import BucketData as BucketData +from ..response.aggs import FieldBucketData as FieldBucketData +from ..response.aggs import TopHitsData as TopHitsData +from .utils import DslBase +from .utils import DslBase as DslBase + +def A(name_or_agg: Any, filter: Incomplete | None = ..., **params: Any) -> Any: ... + +class Agg(DslBase): ... +class AggBase(object): ... +class Bucket(AggBase, Agg): ... +class Filter(Bucket): ... +class Pipeline(Agg): ... +class Filters(Bucket): ... +class Children(Bucket): ... +class Parent(Bucket): ... +class DateHistogram(Bucket): ... +class AutoDateHistogram(DateHistogram): ... +class DateRange(Bucket): ... +class GeoDistance(Bucket): ... +class GeohashGrid(Bucket): ... +class GeotileGrid(Bucket): ... +class GeoCentroid(Bucket): ... +class Global(Bucket): ... +class Histogram(Bucket): ... +class IPRange(Bucket): ... +class Missing(Bucket): ... +class Nested(Bucket): ... +class Range(Bucket): ... +class RareTerms(Bucket): ... +class ReverseNested(Bucket): ... +class SignificantTerms(Bucket): ... +class SignificantText(Bucket): ... +class Terms(Bucket): ... +class Sampler(Bucket): ... +class DiversifiedSampler(Bucket): ... +class Composite(Bucket): ... +class VariableWidthHistogram(Bucket): ... +class TopHits(Agg): ... +class Avg(Agg): ... +class WeightedAvg(Agg): ... +class Cardinality(Agg): ... +class ExtendedStats(Agg): ... +class Boxplot(Agg): ... +class GeoBounds(Agg): ... +class Max(Agg): ... +class MedianAbsoluteDeviation(Agg): ... +class Min(Agg): ... +class Percentiles(Agg): ... +class PercentileRanks(Agg): ... +class ScriptedMetric(Agg): ... +class Stats(Agg): ... +class Sum(Agg): ... +class TTest(Agg): ... +class ValueCount(Agg): ... +class AvgBucket(Pipeline): ... +class BucketScript(Pipeline): ... +class BucketSelector(Pipeline): ... +class CumulativeSum(Pipeline): ... +class CumulativeCardinality(Pipeline): ... +class Derivative(Pipeline): ... +class ExtendedStatsBucket(Pipeline): ... +class Inference(Pipeline): ... +class MaxBucket(Pipeline): ... +class MinBucket(Pipeline): ... +class MovingFn(Pipeline): ... +class MovingAvg(Pipeline): ... +class MovingPercentiles(Pipeline): ... +class Normalize(Pipeline): ... +class PercentilesBucket(Pipeline): ... +class SerialDiff(Pipeline): ... +class StatsBucket(Pipeline): ... +class SumBucket(Pipeline): ... +class BucketSort(Pipeline): ... diff --git a/opensearchpy/helpers/analysis.py b/opensearchpy/helpers/analysis.py new file mode 100644 index 00000000..13478145 --- /dev/null +++ b/opensearchpy/helpers/analysis.py @@ -0,0 +1,294 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import six + +from opensearchpy.connection.connections import get_connection + +from .utils import AttrDict, DslBase, merge + +__all__ = ["tokenizer", "analyzer", "char_filter", "token_filter", "normalizer"] + + +class AnalysisBase(object): + @classmethod + def _type_shortcut(cls, name_or_instance, type=None, **kwargs): + if isinstance(name_or_instance, cls): + if type or kwargs: + raise ValueError("%s() cannot accept parameters." % cls.__name__) + return name_or_instance + + if not (type or kwargs): + return cls.get_dsl_class("builtin")(name_or_instance) + + return cls.get_dsl_class(type, "custom")( + name_or_instance, type or "custom", **kwargs + ) + + +class CustomAnalysis(object): + name = "custom" + + def __init__(self, filter_name, builtin_type="custom", **kwargs): + self._builtin_type = builtin_type + self._name = filter_name + super(CustomAnalysis, self).__init__(**kwargs) + + def to_dict(self): + # only name to present in lists + return self._name + + def get_definition(self): + d = super(CustomAnalysis, self).to_dict() + d = d.pop(self.name) + d["type"] = self._builtin_type + return d + + +class CustomAnalysisDefinition(CustomAnalysis): + def get_analysis_definition(self): + out = {self._type_name: {self._name: self.get_definition()}} + + t = getattr(self, "tokenizer", None) + if "tokenizer" in self._param_defs and hasattr(t, "get_definition"): + out["tokenizer"] = {t._name: t.get_definition()} + + filters = { + f._name: f.get_definition() + for f in self.filter + if hasattr(f, "get_definition") + } + if filters: + out["filter"] = filters + + # any sub filter definitions like multiplexers etc? + for f in self.filter: + if hasattr(f, "get_analysis_definition"): + d = f.get_analysis_definition() + if d: + merge(out, d, True) + + char_filters = { + f._name: f.get_definition() + for f in self.char_filter + if hasattr(f, "get_definition") + } + if char_filters: + out["char_filter"] = char_filters + + return out + + +class BuiltinAnalysis(object): + name = "builtin" + + def __init__(self, name): + self._name = name + super(BuiltinAnalysis, self).__init__() + + def to_dict(self): + # only name to present in lists + return self._name + + +class Analyzer(AnalysisBase, DslBase): + _type_name = "analyzer" + name = None + + +class BuiltinAnalyzer(BuiltinAnalysis, Analyzer): + def get_analysis_definition(self): + return {} + + +class CustomAnalyzer(CustomAnalysisDefinition, Analyzer): + _param_defs = { + "filter": {"type": "token_filter", "multi": True}, + "char_filter": {"type": "char_filter", "multi": True}, + "tokenizer": {"type": "tokenizer"}, + } + + def simulate(self, text, using="default", explain=False, attributes=None): + """ + Use the Analyze API of opensearch to test the outcome of this analyzer. + + :arg text: Text to be analyzed + :arg using: connection alias to use, defaults to ``'default'`` + :arg explain: will output all token attributes for each token. You can + filter token attributes you want to output by setting ``attributes`` + option. + :arg attributes: if ``explain`` is specified, filter the token + attributes to return. + """ + opensearch = get_connection(using) + + body = {"text": text, "explain": explain} + if attributes: + body["attributes"] = attributes + + definition = self.get_analysis_definition() + analyzer_def = self.get_definition() + + for section in ("tokenizer", "char_filter", "filter"): + if section not in analyzer_def: + continue + sec_def = definition.get(section, {}) + sec_names = analyzer_def[section] + + if isinstance(sec_names, six.string_types): + body[section] = sec_def.get(sec_names, sec_names) + else: + body[section] = [ + sec_def.get(sec_name, sec_name) for sec_name in sec_names + ] + + if self._builtin_type != "custom": + body["analyzer"] = self._builtin_type + + return AttrDict(opensearch.indices.analyze(body=body)) + + +class Normalizer(AnalysisBase, DslBase): + _type_name = "normalizer" + name = None + + +class BuiltinNormalizer(BuiltinAnalysis, Normalizer): + def get_analysis_definition(self): + return {} + + +class CustomNormalizer(CustomAnalysisDefinition, Normalizer): + _param_defs = { + "filter": {"type": "token_filter", "multi": True}, + "char_filter": {"type": "char_filter", "multi": True}, + } + + +class Tokenizer(AnalysisBase, DslBase): + _type_name = "tokenizer" + name = None + + +class BuiltinTokenizer(BuiltinAnalysis, Tokenizer): + pass + + +class CustomTokenizer(CustomAnalysis, Tokenizer): + pass + + +class TokenFilter(AnalysisBase, DslBase): + _type_name = "token_filter" + name = None + + +class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter): + pass + + +class CustomTokenFilter(CustomAnalysis, TokenFilter): + pass + + +class MultiplexerTokenFilter(CustomTokenFilter): + name = "multiplexer" + + def get_definition(self): + d = super(CustomTokenFilter, self).get_definition() + + if "filters" in d: + d["filters"] = [ + # comma delimited string given by user + fs if isinstance(fs, six.string_types) else + # list of strings or TokenFilter objects + ", ".join(f.to_dict() if hasattr(f, "to_dict") else f for f in fs) + for fs in self.filters + ] + return d + + def get_analysis_definition(self): + if not hasattr(self, "filters"): + return {} + + fs = {} + d = {"filter": fs} + for filters in self.filters: + if isinstance(filters, six.string_types): + continue + fs.update( + { + f._name: f.get_definition() + for f in filters + if hasattr(f, "get_definition") + } + ) + return d + + +class ConditionalTokenFilter(CustomTokenFilter): + name = "condition" + + def get_definition(self): + d = super(CustomTokenFilter, self).get_definition() + if "filter" in d: + d["filter"] = [ + f.to_dict() if hasattr(f, "to_dict") else f for f in self.filter + ] + return d + + def get_analysis_definition(self): + if not hasattr(self, "filter"): + return {} + + return { + "filter": { + f._name: f.get_definition() + for f in self.filter + if hasattr(f, "get_definition") + } + } + + +class CharFilter(AnalysisBase, DslBase): + _type_name = "char_filter" + name = None + + +class BuiltinCharFilter(BuiltinAnalysis, CharFilter): + pass + + +class CustomCharFilter(CustomAnalysis, CharFilter): + pass + + +# shortcuts for direct use +analyzer = Analyzer._type_shortcut +tokenizer = Tokenizer._type_shortcut +token_filter = TokenFilter._type_shortcut +char_filter = CharFilter._type_shortcut +normalizer = Normalizer._type_shortcut diff --git a/opensearchpy/helpers/analysis.pyi b/opensearchpy/helpers/analysis.pyi new file mode 100644 index 00000000..b4f37af5 --- /dev/null +++ b/opensearchpy/helpers/analysis.pyi @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .utils import DslBase + +class AnalysisBase(object): ... +class CustomAnalysis(object): ... +class CustomAnalysisDefinition(CustomAnalysis): ... +class BuiltinAnalysis(object): ... +class Analyzer(AnalysisBase, DslBase): ... +class BuiltinAnalyzer(BuiltinAnalysis, Analyzer): ... +class CustomAnalyzer(CustomAnalysisDefinition, Analyzer): ... +class Normalizer(AnalysisBase, DslBase): ... +class BuiltinNormalizer(BuiltinAnalysis, Normalizer): ... +class CustomNormalizer(CustomAnalysisDefinition, Normalizer): ... +class Tokenizer(AnalysisBase, DslBase): ... +class BuiltinTokenizer(BuiltinAnalysis, Tokenizer): ... +class CustomTokenizer(CustomAnalysis, Tokenizer): ... +class TokenFilter(AnalysisBase, DslBase): ... +class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter): ... +class CustomTokenFilter(CustomAnalysis, TokenFilter): ... +class MultiplexerTokenFilter(CustomTokenFilter): ... +class ConditionalTokenFilter(CustomTokenFilter): ... +class CharFilter(AnalysisBase, DslBase): ... +class BuiltinCharFilter(BuiltinAnalysis, CharFilter): ... +class CustomCharFilter(CustomAnalysis, CharFilter): ... diff --git a/opensearchpy/helpers/document.py b/opensearchpy/helpers/document.py new file mode 100644 index 00000000..7d45275a --- /dev/null +++ b/opensearchpy/helpers/document.py @@ -0,0 +1,505 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from fnmatch import fnmatch + +from six import add_metaclass, iteritems + +from opensearchpy.connection.connections import get_connection +from opensearchpy.exceptions import NotFoundError, RequestError + +from ..exceptions import IllegalOperation, ValidationException +from .field import Field +from .index import Index +from .mapping import Mapping +from .search import Search +from .utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge + + +class MetaField(object): + def __init__(self, *args, **kwargs): + self.args, self.kwargs = args, kwargs + + +class DocumentMeta(type): + def __new__(cls, name, bases, attrs): + # DocumentMeta filters attrs in place + attrs["_doc_type"] = DocumentOptions(name, bases, attrs) + return super(DocumentMeta, cls).__new__(cls, name, bases, attrs) + + +class IndexMeta(DocumentMeta): + # global flag to guard us from associating an Index with the base Document + # class, only user defined subclasses should have an _index attr + _document_initialized = False + + def __new__(cls, name, bases, attrs): + new_cls = super(IndexMeta, cls).__new__(cls, name, bases, attrs) + if cls._document_initialized: + index_opts = attrs.pop("Index", None) + index = cls.construct_index(index_opts, bases) + new_cls._index = index + index.document(new_cls) + cls._document_initialized = True + return new_cls + + @classmethod + def construct_index(cls, opts, bases): + if opts is None: + for b in bases: + if hasattr(b, "_index"): + return b._index + + # Set None as Index name so it will set _all while making the query + return Index(name=None) + + i = Index(getattr(opts, "name", "*"), using=getattr(opts, "using", "default")) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): + i.analyzer(a) + return i + + +class DocumentOptions(object): + def __init__(self, name, bases, attrs): + meta = attrs.pop("Meta", None) + + # create the mapping instance + self.mapping = getattr(meta, "mapping", Mapping()) + + # register all declared fields into the mapping + for name, value in list(iteritems(attrs)): + if isinstance(value, Field): + self.mapping.field(name, value) + del attrs[name] + + # add all the mappings for meta fields + for name in dir(meta): + if isinstance(getattr(meta, name, None), MetaField): + params = getattr(meta, name) + self.mapping.meta(name, *params.args, **params.kwargs) + + # document inheritance - include the fields from parents' mappings + for b in bases: + if hasattr(b, "_doc_type") and hasattr(b._doc_type, "mapping"): + self.mapping.update(b._doc_type.mapping, update_only=True) + + @property + def name(self): + return self.mapping.properties.name + + +@add_metaclass(DocumentMeta) +class InnerDoc(ObjectBase): + """ + Common class for inner documents like Object or Nested + """ + + @classmethod + def from_opensearch(cls, data, data_only=False): + if data_only: + data = {"_source": data} + return super(InnerDoc, cls).from_opensearch(data) + + +@add_metaclass(IndexMeta) +class Document(ObjectBase): + """ + Model-like class for persisting documents in opensearch. + """ + + @classmethod + def _matches(cls, hit): + if cls._index._name is None: + return True + return fnmatch(hit.get("_index", ""), cls._index._name) + + @classmethod + def _get_using(cls, using=None): + return using or cls._index._using + + @classmethod + def _get_connection(cls, using=None): + return get_connection(cls._get_using(using)) + + @classmethod + def _default_index(cls, index=None): + return index or cls._index._name + + @classmethod + def init(cls, index=None, using=None): + """ + Create the index and populate the mappings in opensearch. + """ + i = cls._index + if index: + i = i.clone(name=index) + i.save(using=using) + + def _get_index(self, index=None, required=True): + if index is None: + index = getattr(self.meta, "index", None) + if index is None: + index = getattr(self._index, "_name", None) + if index is None and required: + raise ValidationException("No index") + if index and "*" in index: + raise ValidationException("You cannot write to a wildcard index.") + return index + + def __repr__(self): + return "{}({})".format( + self.__class__.__name__, + ", ".join( + "{}={!r}".format(key, getattr(self.meta, key)) + for key in ("index", "id") + if key in self.meta + ), + ) + + @classmethod + def search(cls, using=None, index=None): + """ + Create an :class:`~opensearchpy.Search` instance that will search + over this ``Document``. + """ + return Search( + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] + ) + + @classmethod + def get(cls, id, using=None, index=None, **kwargs): + """ + Retrieve a single document from opensearch using its ``id``. + + :arg id: ``id`` of the document to be retrieved + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``OpenSearch.get`` unchanged. + """ + opensearch = cls._get_connection(using) + doc = opensearch.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): + return None + return cls.from_opensearch(doc) + + @classmethod + def exists(cls, id, using=None, index=None, **kwargs): + """ + check if exists a single document from opensearch using its ``id``. + + :arg id: ``id`` of the document to check if exists + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``OpenSearch.exists`` unchanged. + """ + opensearch = cls._get_connection(using) + return opensearch.exists(index=cls._default_index(index), id=id, **kwargs) + + @classmethod + def mget( + cls, docs, using=None, index=None, raise_on_error=True, missing="none", **kwargs + ): + r""" + Retrieve multiple document by their ``id``\s. Returns a list of instances + in the same order as requested. + + :arg docs: list of ``id``\s of the documents to be retrieved or a list + of document specifications as per + https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/multi-get/ + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg missing: what to do when one of the documents requested is not + found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise + ``NotFoundError``) or ``'skip'`` (ignore the missing document). + + Any additional keyword arguments will be passed to + ``OpenSearch.mget`` unchanged. + """ + if missing not in ("raise", "skip", "none"): + raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") + opensearch = cls._get_connection(using) + body = { + "docs": [ + doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc} + for doc in docs + ] + } + results = opensearch.mget(body, index=cls._default_index(index), **kwargs) + + objs, error_docs, missing_docs = [], [], [] + for doc in results["docs"]: + if doc.get("found"): + if error_docs or missing_docs: + # We're going to raise an exception anyway, so avoid an + # expensive call to cls.from_opensearch(). + continue + + objs.append(cls.from_opensearch(doc)) + + elif doc.get("error"): + if raise_on_error: + error_docs.append(doc) + if missing == "none": + objs.append(None) + + # The doc didn't cause an error, but the doc also wasn't found. + elif missing == "raise": + missing_docs.append(doc) + elif missing == "none": + objs.append(None) + + if error_docs: + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) + raise RequestError(400, message, error_docs) + if missing_docs: + missing_ids = [doc["_id"] for doc in missing_docs] + message = "Documents %s not found." % ", ".join(missing_ids) + raise NotFoundError(404, message, {"docs": missing_docs}) + return objs + + def delete(self, using=None, index=None, **kwargs): + """ + Delete the instance in opensearch. + + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``OpenSearch.delete`` unchanged. + """ + opensearch = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + opensearch.delete(index=self._get_index(index), **doc_meta) + + def to_dict(self, include_meta=False, skip_empty=True): + """ + Serialize the instance into a dictionary so that it can be saved in opensearch. + + :arg include_meta: if set to ``True`` will include all the metadata + (``_index``, ``_id`` etc). Otherwise just the document's + data is serialized. This is useful when passing multiple instances into + ``opensearchpy.helpers.bulk``. + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in opensearch. + """ + d = super(Document, self).to_dict(skip_empty=skip_empty) + if not include_meta: + return d + + meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # in case of to_dict include the index unlike save/update/delete + index = self._get_index(required=False) + if index is not None: + meta["_index"] = index + + meta["_source"] = d + return meta + + def update( + self, + using=None, + index=None, + detect_noop=True, + doc_as_upsert=False, + refresh=False, + retry_on_conflict=None, + script=None, + script_id=None, + scripted_upsert=False, + upsert=None, + return_doc_meta=False, + **fields + ): + """ + Partial update of the document, specify fields you wish to update and + both the instance and the document in opensearch will be updated:: + + doc = MyDocument(title='Document Title!') + doc.save() + doc.update(title='New Document Title!') + + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg detect_noop: Set to ``False`` to disable noop detection. + :arg refresh: Control when the changes made by this request are visible + to search. Set to ``True`` for immediate effect. + :arg retry_on_conflict: In between the get and indexing phases of the + update, it is possible that another process might have already + updated the same document. By default, the update will fail with a + version conflict exception. The retry_on_conflict parameter + controls how many times to retry the update before finally throwing + an exception. + :arg doc_as_upsert: Instead of sending a partial doc plus an upsert + doc, setting doc_as_upsert to true will use the contents of doc as + the upsert value + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result + + :return operation result noop/updated + """ + body = { + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, + } + + # scripted update + if script or script_id: + if upsert is not None: + body["upsert"] = upsert + + if script: + script = {"source": script} + else: + script = {"id": script_id} + + script["params"] = fields + + body["script"] = script + body["scripted_upsert"] = scripted_upsert + + # partial document update + else: + if not fields: + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) + + # update given fields locally + merge(self, fields) + + # prepare data for OpenSearch + values = self.to_dict() + + # if fields were given: partial update + body["doc"] = {k: values.get(k) for k in fields.keys()} + + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + if retry_on_conflict is not None: + doc_meta["retry_on_conflict"] = retry_on_conflict + + # Optimistic concurrency control + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + meta = self._get_connection(using).update( + index=self._get_index(index), body=body, refresh=refresh, **doc_meta + ) + # update meta information from OpenSearch + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + def save( + self, + using=None, + index=None, + validate=True, + skip_empty=True, + return_doc_meta=False, + **kwargs + ): + """ + Save the document into opensearch. If the document doesn't exist it + is created, it is overwritten otherwise. Returns ``True`` if this + operations resulted in new document being created. + + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg validate: set to ``False`` to skip validating the document + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in opensearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result + + Any additional keyword arguments will be passed to + ``OpenSearch.index`` unchanged. + + :return operation result created/updated + """ + if validate: + self.full_clean() + + opensearch = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + meta = opensearch.index( + index=self._get_index(index), + body=self.to_dict(skip_empty=skip_empty), + **doc_meta + ) + # update meta information from OpenSearch + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] diff --git a/opensearchpy/helpers/document.pyi b/opensearchpy/helpers/document.pyi new file mode 100644 index 00000000..d740b931 --- /dev/null +++ b/opensearchpy/helpers/document.pyi @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT. + +from .utils import ObjectBase + +class MetaField(object): ... +class DocumentMeta(type): ... +class IndexMeta(DocumentMeta): ... +class DocumentOptions(object): ... +class InnerDoc(ObjectBase): ... +class Document(ObjectBase): ... diff --git a/opensearchpy/helpers/faceted_search.py b/opensearchpy/helpers/faceted_search.py new file mode 100644 index 00000000..6da84dc2 --- /dev/null +++ b/opensearchpy/helpers/faceted_search.py @@ -0,0 +1,472 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime, timedelta + +from six import iteritems, itervalues + +from opensearchpy.helpers.aggs import A + +from .query import MatchAll, Nested, Range, Terms +from .response import Response +from .search import Search +from .utils import AttrDict + +__all__ = [ + "FacetedSearch", + "HistogramFacet", + "TermsFacet", + "DateHistogramFacet", + "RangeFacet", + "NestedFacet", +] + + +class Facet(object): + """ + A facet on faceted search. Wraps and aggregation and provides functionality + to create a filter for selected values and return a list of facet values + from the result of the aggregation. + """ + + agg_type = None + + def __init__(self, metric=None, metric_sort="desc", **kwargs): + self.filter_values = () + self._params = kwargs + self._metric = metric + if metric and metric_sort: + self._params["order"] = {"metric": metric_sort} + + def get_aggregation(self): + """ + Return the aggregation object. + """ + agg = A(self.agg_type, **self._params) + if self._metric: + agg.metric("metric", self._metric) + return agg + + def add_filter(self, filter_values): + """ + Construct a filter. + """ + if not filter_values: + return + + f = self.get_value_filter(filter_values[0]) + for v in filter_values[1:]: + f |= self.get_value_filter(v) + return f + + def get_value_filter(self, filter_value): + """ + Construct a filter for an individual value + """ + pass + + def is_filtered(self, key, filter_values): + """ + Is a filter active on the given key. + """ + return key in filter_values + + def get_value(self, bucket): + """ + return a value representing a bucket. Its key as default. + """ + return bucket["key"] + + def get_metric(self, bucket): + """ + Return a metric, by default doc_count for a bucket. + """ + if self._metric: + return bucket["metric"]["value"] + return bucket["doc_count"] + + def get_values(self, data, filter_values): + """ + Turn the raw bucket data into a list of tuples containing the key, + number of documents and a flag indicating whether this value has been + selected or not. + """ + out = [] + for bucket in data.buckets: + key = self.get_value(bucket) + out.append( + (key, self.get_metric(bucket), self.is_filtered(key, filter_values)) + ) + return out + + +class TermsFacet(Facet): + agg_type = "terms" + + def add_filter(self, filter_values): + """Create a terms filter instead of bool containing term filters.""" + if filter_values: + return Terms( + _expand__to_dot=False, **{self._params["field"]: filter_values} + ) + + +class RangeFacet(Facet): + agg_type = "range" + + def _range_to_dict(self, range): + key, range = range + out = {"key": key} + if range[0] is not None: + out["from"] = range[0] + if range[1] is not None: + out["to"] = range[1] + return out + + def __init__(self, ranges, **kwargs): + super(RangeFacet, self).__init__(**kwargs) + self._params["ranges"] = list(map(self._range_to_dict, ranges)) + self._params["keyed"] = False + self._ranges = dict(ranges) + + def get_value_filter(self, filter_value): + f, t = self._ranges[filter_value] + limits = {} + if f is not None: + limits["gte"] = f + if t is not None: + limits["lt"] = t + + return Range(_expand__to_dot=False, **{self._params["field"]: limits}) + + +class HistogramFacet(Facet): + agg_type = "histogram" + + def get_value_filter(self, filter_value): + return Range( + _expand__to_dot=False, + **{ + self._params["field"]: { + "gte": filter_value, + "lt": filter_value + self._params["interval"], + } + } + ) + + +def _date_interval_year(d): + return d.replace( + year=d.year + 1, day=(28 if d.month == 2 and d.day == 29 else d.day) + ) + + +def _date_interval_month(d): + return (d + timedelta(days=32)).replace(day=1) + + +def _date_interval_week(d): + return d + timedelta(days=7) + + +def _date_interval_day(d): + return d + timedelta(days=1) + + +def _date_interval_hour(d): + return d + timedelta(hours=1) + + +class DateHistogramFacet(Facet): + agg_type = "date_histogram" + + DATE_INTERVALS = { + "year": _date_interval_year, + "1Y": _date_interval_year, + "month": _date_interval_month, + "1M": _date_interval_month, + "week": _date_interval_week, + "1w": _date_interval_week, + "day": _date_interval_day, + "1d": _date_interval_day, + "hour": _date_interval_hour, + "1h": _date_interval_hour, + } + + def __init__(self, **kwargs): + kwargs.setdefault("min_doc_count", 0) + super(DateHistogramFacet, self).__init__(**kwargs) + + def get_value(self, bucket): + if not isinstance(bucket["key"], datetime): + # OpenSearch returns key=None instead of 0 for date 1970-01-01, + # so we need to set key to 0 to avoid TypeError exception + if bucket["key"] is None: + bucket["key"] = 0 + # Preserve milliseconds in the datetime + return datetime.utcfromtimestamp(int(bucket["key"]) / 1000.0) + else: + return bucket["key"] + + def get_value_filter(self, filter_value): + for interval_type in ("calendar_interval", "fixed_interval"): + if interval_type in self._params: + break + else: + interval_type = "interval" + + return Range( + _expand__to_dot=False, + **{ + self._params["field"]: { + "gte": filter_value, + "lt": self.DATE_INTERVALS[self._params[interval_type]]( + filter_value + ), + } + } + ) + + +class NestedFacet(Facet): + agg_type = "nested" + + def __init__(self, path, nested_facet): + self._path = path + self._inner = nested_facet + super(NestedFacet, self).__init__( + path=path, aggs={"inner": nested_facet.get_aggregation()} + ) + + def get_values(self, data, filter_values): + return self._inner.get_values(data.inner, filter_values) + + def add_filter(self, filter_values): + inner_q = self._inner.add_filter(filter_values) + if inner_q: + return Nested(path=self._path, query=inner_q) + + +class FacetedResponse(Response): + @property + def query_string(self): + return self._faceted_search._query + + @property + def facets(self): + if not hasattr(self, "_facets"): + super(AttrDict, self).__setattr__("_facets", AttrDict({})) + for name, facet in iteritems(self._faceted_search.facets): + self._facets[name] = facet.get_values( + getattr(getattr(self.aggregations, "_filter_" + name), name), + self._faceted_search.filter_values.get(name, ()), + ) + return self._facets + + +class FacetedSearch(object): + """ + Abstraction for creating faceted navigation searches that takes care of + composing the queries, aggregations and filters as needed as well as + presenting the results in an easy-to-consume fashion:: + + class BlogSearch(FacetedSearch): + index = 'blogs' + doc_types = [Blog, Post] + fields = ['title^5', 'category', 'description', 'body'] + + facets = { + 'type': TermsFacet(field='_type'), + 'category': TermsFacet(field='category'), + 'weekly_posts': DateHistogramFacet(field='published_from', interval='week') + } + + def search(self): + ' Override search to add your own filters ' + s = super(BlogSearch, self).search() + return s.filter('term', published=True) + + # when using: + blog_search = BlogSearch("web framework", filters={"category": "python"}) + + # supports pagination + blog_search[10:20] + + response = blog_search.execute() + + # easy access to aggregation results: + for category, hit_count, is_selected in response.facets.category: + print( + "Category %s has %d hits%s." % ( + category, + hit_count, + ' and is chosen' if is_selected else '' + ) + ) + + """ + + index = None + doc_types = None + fields = None + facets = {} + using = "default" + + def __init__(self, query=None, filters={}, sort=()): + """ + :arg query: the text to search for + :arg filters: facet values to filter + :arg sort: sort information to be passed to :class:`~opensearchpy.Search` + """ + self._query = query + self._filters = {} + self._sort = sort + self.filter_values = {} + for name, value in iteritems(filters): + self.add_filter(name, value) + + self._s = self.build_search() + + def count(self): + return self._s.count() + + def __getitem__(self, k): + self._s = self._s[k] + return self + + def __iter__(self): + return iter(self._s) + + def add_filter(self, name, filter_values): + """ + Add a filter for a facet. + """ + # normalize the value into a list + if not isinstance(filter_values, (tuple, list)): + if filter_values is None: + return + filter_values = [ + filter_values, + ] + + # remember the filter values for use in FacetedResponse + self.filter_values[name] = filter_values + + # get the filter from the facet + f = self.facets[name].add_filter(filter_values) + if f is None: + return + + self._filters[name] = f + + def search(self): + """ + Returns the base Search object to which the facets are added. + + You can customize the query by overriding this method and returning a + modified search object. + """ + s = Search(doc_type=self.doc_types, index=self.index, using=self.using) + return s.response_class(FacetedResponse) + + def query(self, search, query): + """ + Add query part to ``search``. + + Override this if you wish to customize the query used. + """ + if query: + if self.fields: + return search.query("multi_match", fields=self.fields, query=query) + else: + return search.query("multi_match", query=query) + return search + + def aggregate(self, search): + """ + Add aggregations representing the facets selected, including potential + filters. + """ + for f, facet in iteritems(self.facets): + agg = facet.get_aggregation() + agg_filter = MatchAll() + for field, filter in iteritems(self._filters): + if f == field: + continue + agg_filter &= filter + search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket( + f, agg + ) + + def filter(self, search): + """ + Add a ``post_filter`` to the search request narrowing the results based + on the facet filters. + """ + if not self._filters: + return search + + post_filter = MatchAll() + for f in itervalues(self._filters): + post_filter &= f + return search.post_filter(post_filter) + + def highlight(self, search): + """ + Add highlighting for all the fields + """ + return search.highlight( + *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields) + ) + + def sort(self, search): + """ + Add sorting information to the request. + """ + if self._sort: + search = search.sort(*self._sort) + return search + + def build_search(self): + """ + Construct the ``Search`` object. + """ + s = self.search() + s = self.query(s, self._query) + s = self.filter(s) + if self.fields: + s = self.highlight(s) + s = self.sort(s) + self.aggregate(s) + return s + + def execute(self): + """ + Execute the search and return the response. + """ + r = self._s.execute() + r._faceted_search = self + return r diff --git a/opensearchpy/helpers/faceted_search.pyi b/opensearchpy/helpers/faceted_search.pyi new file mode 100644 index 00000000..3f1d175b --- /dev/null +++ b/opensearchpy/helpers/faceted_search.pyi @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from opensearchpy.helpers.response import Response + +class Facet(object): ... +class TermsFacet(Facet): ... +class RangeFacet(Facet): ... +class HistogramFacet(Facet): ... +class DateHistogramFacet(Facet): ... +class NestedFacet(Facet): ... +class FacetedResponse(Response): ... +class FacetedSearch(object): ... diff --git a/opensearchpy/helpers/field.py b/opensearchpy/helpers/field.py new file mode 100644 index 00000000..756a3a0e --- /dev/null +++ b/opensearchpy/helpers/field.py @@ -0,0 +1,530 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import base64 +import copy +import ipaddress + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from datetime import date, datetime + +from dateutil import parser, tz +from six import integer_types, iteritems, string_types +from six.moves import map + +from ..exceptions import ValidationException +from .query import Q +from .utils import AttrDict, AttrList, DslBase +from .wrappers import Range + +unicode = type("") + + +def construct_field(name_or_field, **params): + # {"type": "text", "analyzer": "snowball"} + if isinstance(name_or_field, collections_abc.Mapping): + if params: + raise ValueError( + "construct_field() cannot accept parameters when passing in a dict." + ) + params = name_or_field.copy() + if "type" not in params: + # inner object can be implicitly defined + if "properties" in params: + name = "object" + else: + raise ValueError('construct_field() needs to have a "type" key.') + else: + name = params.pop("type") + return Field.get_dsl_class(name)(**params) + + # Text() + if isinstance(name_or_field, Field): + if params: + raise ValueError( + "construct_field() cannot accept parameters " + "when passing in a construct_field object." + ) + return name_or_field + + # "text", analyzer="snowball" + return Field.get_dsl_class(name_or_field)(**params) + + +class Field(DslBase): + _type_name = "field" + _type_shortcut = staticmethod(construct_field) + # all fields can be multifields + _param_defs = {"fields": {"type": "field", "hash": True}} + name = None + _coerce = False + + def __init__(self, multi=False, required=False, *args, **kwargs): + """ + :arg bool multi: specifies whether field can contain array of values + :arg bool required: specifies whether field is required + """ + self._multi = multi + self._required = required + super(Field, self).__init__(*args, **kwargs) + + def __getitem__(self, subfield): + return self._params.get("fields", {})[subfield] + + def _serialize(self, data): + return data + + def _deserialize(self, data): + return data + + def _empty(self): + return None + + def empty(self): + if self._multi: + return AttrList([]) + return self._empty() + + def serialize(self, data): + if isinstance(data, (list, AttrList, tuple)): + return list(map(self._serialize, data)) + return self._serialize(data) + + def deserialize(self, data): + if isinstance(data, (list, AttrList, tuple)): + data = [None if d is None else self._deserialize(d) for d in data] + return data + if data is None: + return None + return self._deserialize(data) + + def clean(self, data): + if data is not None: + data = self.deserialize(data) + if data in (None, [], {}) and self._required: + raise ValidationException("Value required for this field.") + return data + + def to_dict(self): + d = super(Field, self).to_dict() + name, value = d.popitem() + value["type"] = name + return value + + +class CustomField(Field): + name = "custom" + _coerce = True + + def to_dict(self): + if isinstance(self.builtin_type, Field): + return self.builtin_type.to_dict() + + d = super(CustomField, self).to_dict() + d["type"] = self.builtin_type + return d + + +class Object(Field): + name = "object" + _coerce = True + + def __init__(self, doc_class=None, dynamic=None, properties=None, **kwargs): + """ + :arg document.InnerDoc doc_class: base doc class that handles mapping. + If no `doc_class` is provided, new instance of `InnerDoc` will be created, + populated with `properties` and used. Can not be provided together with `properties` + :arg dynamic: whether new properties may be created dynamically. + Valid values are `True`, `False`, `'strict'`. + Can not be provided together with `doc_class`. + :arg dict properties: used to construct underlying mapping if no `doc_class` is provided. + Can not be provided together with `doc_class` + """ + if doc_class and (properties or dynamic is not None): + raise ValidationException( + "doc_class and properties/dynamic should not be provided together" + ) + if doc_class: + self._doc_class = doc_class + else: + # FIXME import + from opensearchpy.helpers.document import InnerDoc + + # no InnerDoc subclass, creating one instead... + self._doc_class = type("InnerDoc", (InnerDoc,), {}) + for name, field in iteritems(properties or {}): + self._doc_class._doc_type.mapping.field(name, field) + if dynamic is not None: + self._doc_class._doc_type.mapping.meta("dynamic", dynamic) + + self._mapping = copy.deepcopy(self._doc_class._doc_type.mapping) + super(Object, self).__init__(**kwargs) + + def __getitem__(self, name): + return self._mapping[name] + + def __contains__(self, name): + return name in self._mapping + + def _empty(self): + return self._wrap({}) + + def _wrap(self, data): + return self._doc_class.from_opensearch(data, data_only=True) + + def empty(self): + if self._multi: + return AttrList([], self._wrap) + return self._empty() + + def to_dict(self): + d = self._mapping.to_dict() + d.update(super(Object, self).to_dict()) + return d + + def _collect_fields(self): + return self._mapping.properties._collect_fields() + + def _deserialize(self, data): + # don't wrap already wrapped data + if isinstance(data, self._doc_class): + return data + + if isinstance(data, AttrDict): + data = data._d_ + + return self._wrap(data) + + def _serialize(self, data): + if data is None: + return None + + # somebody assigned raw dict to the field, we should tolerate that + if isinstance(data, collections_abc.Mapping): + return data + + return data.to_dict() + + def clean(self, data): + data = super(Object, self).clean(data) + if data is None: + return None + if isinstance(data, (list, AttrList)): + for d in data: + d.full_clean() + else: + data.full_clean() + return data + + def update(self, other, update_only=False): + if not isinstance(other, Object): + # not an inner/nested object, no merge possible + return + + self._mapping.update(other._mapping, update_only) + + +class Nested(Object): + name = "nested" + + def __init__(self, *args, **kwargs): + kwargs.setdefault("multi", True) + super(Nested, self).__init__(*args, **kwargs) + + +class Date(Field): + name = "date" + _coerce = True + + def __init__(self, default_timezone=None, *args, **kwargs): + """ + :arg default_timezone: timezone that will be automatically used for tz-naive values + May be instance of `datetime.tzinfo` or string containing TZ offset + """ + self._default_timezone = default_timezone + if isinstance(self._default_timezone, string_types): + self._default_timezone = tz.gettz(self._default_timezone) + super(Date, self).__init__(*args, **kwargs) + + def _deserialize(self, data): + if isinstance(data, string_types): + try: + data = parser.parse(data) + except Exception as e: + raise ValidationException( + "Could not parse date from the value (%r)" % data, e + ) + + if isinstance(data, datetime): + if self._default_timezone and data.tzinfo is None: + data = data.replace(tzinfo=self._default_timezone) + return data + if isinstance(data, date): + return data + if isinstance(data, integer_types): + # Divide by a float to preserve milliseconds on the datetime. + return datetime.utcfromtimestamp(data / 1000.0) + + raise ValidationException("Could not parse date from the value (%r)" % data) + + +class Text(Field): + _param_defs = { + "fields": {"type": "field", "hash": True}, + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + "search_quote_analyzer": {"type": "analyzer"}, + } + name = "text" + + +class SearchAsYouType(Field): + _param_defs = { + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + "search_quote_analyzer": {"type": "analyzer"}, + } + name = "search_as_you_type" + + +class Keyword(Field): + _param_defs = { + "fields": {"type": "field", "hash": True}, + "search_analyzer": {"type": "analyzer"}, + "normalizer": {"type": "normalizer"}, + } + name = "keyword" + + +class ConstantKeyword(Keyword): + name = "constant_keyword" + + +class Boolean(Field): + name = "boolean" + _coerce = True + + def _deserialize(self, data): + if data == "false": + return False + return bool(data) + + def clean(self, data): + if data is not None: + data = self.deserialize(data) + if data is None and self._required: + raise ValidationException("Value required for this field.") + return data + + +class Float(Field): + name = "float" + _coerce = True + + def _deserialize(self, data): + return float(data) + + +class DenseVector(Float): + name = "dense_vector" + + def __init__(self, dims, **kwargs): + kwargs["multi"] = True + super(DenseVector, self).__init__(dims=dims, **kwargs) + + +class SparseVector(Field): + name = "sparse_vector" + + +class HalfFloat(Float): + name = "half_float" + + +class ScaledFloat(Float): + name = "scaled_float" + + def __init__(self, scaling_factor, *args, **kwargs): + super(ScaledFloat, self).__init__( + scaling_factor=scaling_factor, *args, **kwargs + ) + + +class Double(Float): + name = "double" + + +class RankFeature(Float): + name = "rank_feature" + + +class RankFeatures(Field): + name = "rank_features" + + +class Integer(Field): + name = "integer" + _coerce = True + + def _deserialize(self, data): + return int(data) + + +class Byte(Integer): + name = "byte" + + +class Short(Integer): + name = "short" + + +class Long(Integer): + name = "long" + + +class Ip(Field): + name = "ip" + _coerce = True + + def _deserialize(self, data): + # the ipaddress library for pypy only accepts unicode. + return ipaddress.ip_address(unicode(data)) + + def _serialize(self, data): + if data is None: + return None + return str(data) + + +class Binary(Field): + name = "binary" + _coerce = True + + def clean(self, data): + # Binary fields are opaque, so there's not much cleaning + # that can be done. + return data + + def _deserialize(self, data): + return base64.b64decode(data) + + def _serialize(self, data): + if data is None: + return None + return base64.b64encode(data).decode() + + +class GeoPoint(Field): + name = "geo_point" + + +class GeoShape(Field): + name = "geo_shape" + + +class Completion(Field): + _param_defs = { + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + } + name = "completion" + + +class Percolator(Field): + name = "percolator" + _coerce = True + + def _deserialize(self, data): + return Q(data) + + def _serialize(self, data): + if data is None: + return None + return data.to_dict() + + +class RangeField(Field): + _coerce = True + _core_field = None + + def _deserialize(self, data): + if isinstance(data, Range): + return data + data = dict((k, self._core_field.deserialize(v)) for k, v in iteritems(data)) + return Range(data) + + def _serialize(self, data): + if data is None: + return None + if not isinstance(data, collections_abc.Mapping): + data = data.to_dict() + return dict((k, self._core_field.serialize(v)) for k, v in iteritems(data)) + + +class IntegerRange(RangeField): + name = "integer_range" + _core_field = Integer() + + +class FloatRange(RangeField): + name = "float_range" + _core_field = Float() + + +class LongRange(RangeField): + name = "long_range" + _core_field = Long() + + +class DoubleRange(RangeField): + name = "double_range" + _core_field = Double() + + +class DateRange(RangeField): + name = "date_range" + _core_field = Date() + + +class IpRange(Field): + # not a RangeField since ip_range supports CIDR ranges + name = "ip_range" + + +class Join(Field): + name = "join" + + +class TokenCount(Field): + name = "token_count" + + +class Murmur3(Field): + name = "murmur3" diff --git a/opensearchpy/helpers/field.pyi b/opensearchpy/helpers/field.pyi new file mode 100644 index 00000000..3704aa81 --- /dev/null +++ b/opensearchpy/helpers/field.pyi @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +from .utils import DslBase + +class Field(DslBase): ... +class CustomField(Field): ... +class Object(Field): ... +class Nested(Object): ... +class Date(Field): ... +class Text(Field): ... +class SearchAsYouType(Field): ... +class Keyword(Field): ... +class ConstantKeyword(Keyword): ... +class Boolean(Field): ... +class Float(Field): ... +class DenseVector(Float): ... +class SparseVector(Field): ... +class HalfFloat(Float): ... +class ScaledFloat(Float): ... +class Double(Float): ... +class RankFeature(Float): ... +class RankFeatures(Field): ... +class Integer(Field): ... +class Byte(Integer): ... +class Short(Integer): ... +class Long(Integer): ... +class Ip(Field): ... +class Binary(Field): ... +class GeoPoint(Field): ... +class GeoShape(Field): ... +class Completion(Field): ... +class Percolator(Field): ... +class RangeField(Field): ... +class IntegerRange(RangeField): ... +class FloatRange(RangeField): ... +class LongRange(RangeField): ... +class DoubleRange(RangeField): ... +class DateRange(RangeField): ... +class IpRange(Field): ... +class Join(Field): ... +class TokenCount(Field): ... +class Murmur3(Field): ... + +def construct_field(name_or_field: Any, **params: Any) -> Any: ... diff --git a/opensearchpy/helpers/function.py b/opensearchpy/helpers/function.py new file mode 100644 index 00000000..5b8db7b0 --- /dev/null +++ b/opensearchpy/helpers/function.py @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from .utils import DslBase + + +def SF(name_or_sf, **params): + # {"script_score": {"script": "_score"}, "filter": {}} + if isinstance(name_or_sf, collections_abc.Mapping): + if params: + raise ValueError("SF() cannot accept parameters when passing in a dict.") + kwargs = {} + sf = name_or_sf.copy() + for k in ScoreFunction._param_defs: + if k in name_or_sf: + kwargs[k] = sf.pop(k) + + # not sf, so just filter+weight, which used to be boost factor + if not sf: + name = "boost_factor" + # {'FUNCTION': {...}} + elif len(sf) == 1: + name, params = sf.popitem() + else: + raise ValueError("SF() got an unexpected fields in the dictionary: %r" % sf) + + # boost factor special case, see https://github.com/elastic/elasticsearch/issues/6343 + if not isinstance(params, collections_abc.Mapping): + params = {"value": params} + + # mix known params (from _param_defs) and from inside the function + kwargs.update(params) + return ScoreFunction.get_dsl_class(name)(**kwargs) + + # ScriptScore(script="_score", filter=Q()) + if isinstance(name_or_sf, ScoreFunction): + if params: + raise ValueError( + "SF() cannot accept parameters when passing in a ScoreFunction object." + ) + return name_or_sf + + # "script_score", script="_score", filter=Q() + return ScoreFunction.get_dsl_class(name_or_sf)(**params) + + +class ScoreFunction(DslBase): + _type_name = "score_function" + _type_shortcut = staticmethod(SF) + _param_defs = { + "query": {"type": "query"}, + "filter": {"type": "query"}, + "weight": {}, + } + name = None + + def to_dict(self): + d = super(ScoreFunction, self).to_dict() + # filter and query dicts should be at the same level as us + for k in self._param_defs: + if k in d[self.name]: + d[k] = d[self.name].pop(k) + return d + + +class ScriptScore(ScoreFunction): + name = "script_score" + + +class BoostFactor(ScoreFunction): + name = "boost_factor" + + def to_dict(self): + d = super(BoostFactor, self).to_dict() + if "value" in d[self.name]: + d[self.name] = d[self.name].pop("value") + else: + del d[self.name] + return d + + +class RandomScore(ScoreFunction): + name = "random_score" + + +class FieldValueFactor(ScoreFunction): + name = "field_value_factor" + + +class Linear(ScoreFunction): + name = "linear" + + +class Gauss(ScoreFunction): + name = "gauss" + + +class Exp(ScoreFunction): + name = "exp" diff --git a/opensearchpy/helpers/function.pyi b/opensearchpy/helpers/function.pyi new file mode 100644 index 00000000..58a00fba --- /dev/null +++ b/opensearchpy/helpers/function.pyi @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +from .utils import DslBase + +class ScoreFunction(DslBase): ... +class ScriptScore(ScoreFunction): ... +class BoostFactor(ScoreFunction): ... +class RandomScore(ScoreFunction): ... +class FieldValueFactor(ScoreFunction): ... +class Linear(ScoreFunction): ... +class Gauss(ScoreFunction): ... +class Exp(ScoreFunction): ... + +def SF(name_or_sf: Any, **params: Any) -> Any: ... diff --git a/opensearchpy/helpers/index.py b/opensearchpy/helpers/index.py new file mode 100644 index 00000000..d6e08b50 --- /dev/null +++ b/opensearchpy/helpers/index.py @@ -0,0 +1,640 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from opensearchpy.connection.connections import get_connection +from opensearchpy.helpers import analysis + +from ..exceptions import IllegalOperation +from .mapping import Mapping +from .search import Search +from .update_by_query import UpdateByQuery +from .utils import merge + + +class IndexTemplate(object): + def __init__(self, name, template, index=None, order=None, **kwargs): + if index is None: + self._index = Index(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.order = order + + def __getattr__(self, attr_name): + return getattr(self._index, attr_name) + + def to_dict(self): + d = self._index.to_dict() + d["index_patterns"] = [self._index._name] + if self.order is not None: + d["order"] = self.order + return d + + def save(self, using=None): + opensearch = get_connection(using or self._index._using) + return opensearch.indices.put_template( + name=self._template_name, body=self.to_dict() + ) + + +class Index(object): + def __init__(self, name, using="default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + self._name = name + self._doc_types = [] + self._using = using + self._settings = {} + self._aliases = {} + self._analysis = {} + self._mapping = None + + def get_or_create_mapping(self): + if self._mapping is None: + self._mapping = Mapping() + return self._mapping + + def as_template(self, template_name, pattern=None, order=None): + # TODO: should we allow pattern to be a top-level arg? + # or maybe have an IndexPattern that allows for it and have + # Document._index be that? + return IndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) + + def resolve_nested(self, field_path): + for doc in self._doc_types: + nested, field = doc._doc_type.mapping.resolve_nested(field_path) + if field is not None: + return nested, field + if self._mapping: + return self._mapping.resolve_nested(field_path) + return (), None + + def resolve_field(self, field_path): + for doc in self._doc_types: + field = doc._doc_type.mapping.resolve_field(field_path) + if field is not None: + return field + if self._mapping: + return self._mapping.resolve_field(field_path) + return None + + def load_mappings(self, using=None): + self.get_or_create_mapping().update_from_opensearch( + self._name, using=using or self._using + ) + + def clone(self, name=None, using=None): + """ + Create a copy of the instance with another name or connection alias. + Useful for creating multiple indices with shared configuration:: + + i = Index('base-index') + i.settings(number_of_shards=1) + i.create() + + i2 = i.clone('other-index') + i2.create() + + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + i = Index(name or self._name, using=using or self._using) + i._settings = self._settings.copy() + i._aliases = self._aliases.copy() + i._analysis = self._analysis.copy() + i._doc_types = self._doc_types[:] + if self._mapping is not None: + i._mapping = self._mapping._clone() + return i + + def _get_connection(self, using=None): + if self._name is None: + raise ValueError("You cannot perform API calls on the default index.") + return get_connection(using or self._using) + + connection = property(_get_connection) + + def mapping(self, mapping): + """ + Associate a mapping (an instance of + :class:`~opensearchpy.Mapping`) with this index. + This means that, when this index is created, it will contain the + mappings for the document type defined by those mappings. + """ + self.get_or_create_mapping().update(mapping) + + def document(self, document): + """ + Associate a :class:`~opensearchpy.Document` subclass with an index. + This means that, when this index is created, it will contain the + mappings for the ``Document``. If the ``Document`` class doesn't have a + default index yet (by defining ``class Index``), this instance will be + used. Can be used as a decorator:: + + i = Index('blog') + + @i.document + class Post(Document): + title = Text() + + # create the index, including Post mappings + i.create() + + # .search() will now return a Search object that will return + # properly deserialized Post instances + s = i.search() + """ + self._doc_types.append(document) + + # If the document index does not have any name, that means the user + # did not set any index already to the document. + # So set this index as document index + if document._index._name is None: + document._index = self + + return document + + def settings(self, **kwargs): + """ + Add settings to the index:: + + i = Index('i') + i.settings(number_of_shards=1, number_of_replicas=0) + + Multiple calls to ``settings`` will merge the keys, later overriding + the earlier. + """ + self._settings.update(kwargs) + return self + + def aliases(self, **kwargs): + """ + Add aliases to the index definition:: + + i = Index('blog-v2') + i.aliases(blog={}, published={'filter': Q('term', published=True)}) + """ + self._aliases.update(kwargs) + return self + + def analyzer(self, *args, **kwargs): + """ + Explicitly add an analyzer to an index. Note that all custom analyzers + defined in mappings will also be created. This is useful for search analyzers. + + Example:: + + from opensearchpy import analyzer, tokenizer + + my_analyzer = analyzer('my_analyzer', + tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), + filter=['lowercase'] + ) + + i = Index('blog') + i.analyzer(my_analyzer) + + """ + analyzer = analysis.analyzer(*args, **kwargs) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + return + + # merge the definition + merge(self._analysis, d, True) + + def to_dict(self): + out = {} + if self._settings: + out["settings"] = self._settings + if self._aliases: + out["aliases"] = self._aliases + mappings = self._mapping.to_dict() if self._mapping else {} + analysis = self._mapping._collect_analysis() if self._mapping else {} + for d in self._doc_types: + mapping = d._doc_type.mapping + merge(mappings, mapping.to_dict(), True) + merge(analysis, mapping._collect_analysis(), True) + if mappings: + out["mappings"] = mappings + if analysis or self._analysis: + merge(analysis, self._analysis) + out.setdefault("settings", {})["analysis"] = analysis + return out + + def search(self, using=None): + """ + Return a :class:`~opensearchpy.Search` object searching over the + index (or all the indices belonging to this template) and its + ``Document``\\s. + """ + return Search( + using=using or self._using, index=self._name, doc_type=self._doc_types + ) + + def updateByQuery(self, using=None): + """ + Return a :class:`~opensearchpy.UpdateByQuery` object searching over the index + (or all the indices belonging to this template) and updating Documents that match + the search criteria. + + For more information, see here: + https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/update-by-query/ + """ + return UpdateByQuery( + using=using or self._using, + index=self._name, + ) + + def create(self, using=None, **kwargs): + """ + Creates the index in opensearch. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.create`` unchanged. + """ + return self._get_connection(using).indices.create( + index=self._name, body=self.to_dict(), **kwargs + ) + + def is_closed(self, using=None): + state = self._get_connection(using).cluster.state( + index=self._name, metric="metadata" + ) + return state["metadata"]["indices"][self._name]["state"] == "close" + + def save(self, using=None): + """ + Sync the index definition with opensearch, creating the index if it + doesn't exist and updating its settings and mappings if it does. + + Note some settings and mapping changes cannot be done on an open + index (or at all on an existing index) and for those this method will + fail with the underlying exception. + """ + if not self.exists(using=using): + return self.create(using=using) + + body = self.to_dict() + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = self.get_settings(using=using)[self._name]["settings"][ + "index" + ] + if analysis: + if self.is_closed(using=using): + # closed index, update away + settings["analysis"] = analysis + else: + # compare analysis definition, if all analysis objects are + # already defined as requested, skip analysis update and + # proceed, otherwise raise IllegalOperation + existing_analysis = current_settings.get("analysis", {}) + if any( + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] + for section in analysis + for k in analysis[section] + ): + raise IllegalOperation( + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) + + # try and update the settings + if settings: + settings = settings.copy() + for k, v in list(settings.items()): + if k in current_settings and current_settings[k] == str(v): + del settings[k] + + if settings: + self.put_settings(using=using, body=settings) + + # update the mappings, any conflict in the mappings will result in an + # exception + mappings = body.pop("mappings", {}) + if mappings: + self.put_mapping(using=using, body=mappings) + + def analyze(self, using=None, **kwargs): + """ + Perform the analysis process on a text and return the tokens breakdown + of the text. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.analyze`` unchanged. + """ + return self._get_connection(using).indices.analyze(index=self._name, **kwargs) + + def refresh(self, using=None, **kwargs): + """ + Performs a refresh operation on the index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.refresh`` unchanged. + """ + return self._get_connection(using).indices.refresh(index=self._name, **kwargs) + + def flush(self, using=None, **kwargs): + """ + Performs a flush operation on the index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.flush`` unchanged. + """ + return self._get_connection(using).indices.flush(index=self._name, **kwargs) + + def get(self, using=None, **kwargs): + """ + The get index API allows to retrieve information about the index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.get`` unchanged. + """ + return self._get_connection(using).indices.get(index=self._name, **kwargs) + + def open(self, using=None, **kwargs): + """ + Opens the index in opensearch. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.open`` unchanged. + """ + return self._get_connection(using).indices.open(index=self._name, **kwargs) + + def close(self, using=None, **kwargs): + """ + Closes the index in opensearch. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.close`` unchanged. + """ + return self._get_connection(using).indices.close(index=self._name, **kwargs) + + def delete(self, using=None, **kwargs): + """ + Deletes the index in opensearch. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.delete`` unchanged. + """ + return self._get_connection(using).indices.delete(index=self._name, **kwargs) + + def exists(self, using=None, **kwargs): + """ + Returns ``True`` if the index already exists in opensearch. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.exists`` unchanged. + """ + return self._get_connection(using).indices.exists(index=self._name, **kwargs) + + def put_mapping(self, using=None, **kwargs): + """ + Register specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.put_mapping`` unchanged. + """ + return self._get_connection(using).indices.put_mapping( + index=self._name, **kwargs + ) + + def get_mapping(self, using=None, **kwargs): + """ + Retrieve specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.get_mapping`` unchanged. + """ + return self._get_connection(using).indices.get_mapping( + index=self._name, **kwargs + ) + + def get_field_mapping(self, using=None, **kwargs): + """ + Retrieve mapping definition of a specific field. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.get_field_mapping`` unchanged. + """ + return self._get_connection(using).indices.get_field_mapping( + index=self._name, **kwargs + ) + + def put_alias(self, using=None, **kwargs): + """ + Create an alias for the index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.put_alias`` unchanged. + """ + return self._get_connection(using).indices.put_alias(index=self._name, **kwargs) + + def exists_alias(self, using=None, **kwargs): + """ + Return a boolean indicating whether given alias exists for this index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.exists_alias`` unchanged. + """ + return self._get_connection(using).indices.exists_alias( + index=self._name, **kwargs + ) + + def get_alias(self, using=None, **kwargs): + """ + Retrieve a specified alias. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.get_alias`` unchanged. + """ + return self._get_connection(using).indices.get_alias(index=self._name, **kwargs) + + def delete_alias(self, using=None, **kwargs): + """ + Delete specific alias. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.delete_alias`` unchanged. + """ + return self._get_connection(using).indices.delete_alias( + index=self._name, **kwargs + ) + + def get_settings(self, using=None, **kwargs): + """ + Retrieve settings for the index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.get_settings`` unchanged. + """ + return self._get_connection(using).indices.get_settings( + index=self._name, **kwargs + ) + + def put_settings(self, using=None, **kwargs): + """ + Change specific index level settings in real time. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.put_settings`` unchanged. + """ + return self._get_connection(using).indices.put_settings( + index=self._name, **kwargs + ) + + def stats(self, using=None, **kwargs): + """ + Retrieve statistics on different operations happening on the index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.stats`` unchanged. + """ + return self._get_connection(using).indices.stats(index=self._name, **kwargs) + + def segments(self, using=None, **kwargs): + """ + Provide low level segments information that a Lucene index (shard + level) is built with. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.segments`` unchanged. + """ + return self._get_connection(using).indices.segments(index=self._name, **kwargs) + + def validate_query(self, using=None, **kwargs): + """ + Validate a potentially expensive query without executing it. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.validate_query`` unchanged. + """ + return self._get_connection(using).indices.validate_query( + index=self._name, **kwargs + ) + + def clear_cache(self, using=None, **kwargs): + """ + Clear all caches or specific cached associated with the index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.clear_cache`` unchanged. + """ + return self._get_connection(using).indices.clear_cache( + index=self._name, **kwargs + ) + + def recovery(self, using=None, **kwargs): + """ + The indices recovery API provides insight into on-going shard + recoveries for the index. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.recovery`` unchanged. + """ + return self._get_connection(using).indices.recovery(index=self._name, **kwargs) + + def upgrade(self, using=None, **kwargs): + """ + Upgrade the index to the latest format. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.upgrade`` unchanged. + """ + return self._get_connection(using).indices.upgrade(index=self._name, **kwargs) + + def get_upgrade(self, using=None, **kwargs): + """ + Monitor how much of the index is upgraded. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.get_upgrade`` unchanged. + """ + return self._get_connection(using).indices.get_upgrade( + index=self._name, **kwargs + ) + + def shard_stores(self, using=None, **kwargs): + """ + Provides store information for shard copies of the index. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.shard_stores`` unchanged. + """ + return self._get_connection(using).indices.shard_stores( + index=self._name, **kwargs + ) + + def forcemerge(self, using=None, **kwargs): + """ + The force merge API allows to force merging of the index through an + API. The merge relates to the number of segments a Lucene index holds + within each shard. The force merge operation allows to reduce the + number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.forcemerge`` unchanged. + """ + return self._get_connection(using).indices.forcemerge( + index=self._name, **kwargs + ) + + def shrink(self, using=None, **kwargs): + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + + Any additional keyword arguments will be passed to + ``OpenSearch.indices.shrink`` unchanged. + """ + return self._get_connection(using).indices.shrink(index=self._name, **kwargs) diff --git a/opensearchpy/helpers/index.pyi b/opensearchpy/helpers/index.pyi new file mode 100644 index 00000000..2bf5747e --- /dev/null +++ b/opensearchpy/helpers/index.pyi @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class IndexTemplate(object): ... +class Index(object): ... diff --git a/opensearchpy/helpers/mapping.py b/opensearchpy/helpers/mapping.py new file mode 100644 index 00000000..9270da97 --- /dev/null +++ b/opensearchpy/helpers/mapping.py @@ -0,0 +1,249 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from itertools import chain + +from six import iteritems, itervalues + +from opensearchpy.connection.connections import get_connection +from opensearchpy.helpers.field import Nested, Text, construct_field + +from .utils import DslBase + +META_FIELDS = frozenset( + ( + "dynamic", + "transform", + "dynamic_date_formats", + "date_detection", + "numeric_detection", + "dynamic_templates", + "enabled", + ) +) + + +class Properties(DslBase): + name = "properties" + _param_defs = {"properties": {"type": "field", "hash": True}} + + def __init__(self): + super(Properties, self).__init__() + + def __repr__(self): + return "Properties()" + + def __getitem__(self, name): + return self.properties[name] + + def __contains__(self, name): + return name in self.properties + + def to_dict(self): + return super(Properties, self).to_dict()["properties"] + + def field(self, name, *args, **kwargs): + self.properties[name] = construct_field(*args, **kwargs) + return self + + def _collect_fields(self): + """Iterate over all Field objects within, including multi fields.""" + for f in itervalues(self.properties.to_dict()): + yield f + # multi fields + if hasattr(f, "fields"): + for inner_f in itervalues(f.fields.to_dict()): + yield inner_f + # nested and inner objects + if hasattr(f, "_collect_fields"): + for inner_f in f._collect_fields(): + yield inner_f + + def update(self, other_object): + if not hasattr(other_object, "properties"): + # not an inner/nested object, no merge possible + return + + our, other = self.properties, other_object.properties + for name in other: + if name in our: + if hasattr(our[name], "update"): + our[name].update(other[name]) + continue + our[name] = other[name] + + +class Mapping(object): + def __init__(self): + self.properties = Properties() + self._meta = {} + + def __repr__(self): + return "Mapping()" + + def _clone(self): + m = Mapping() + m.properties._params = self.properties._params.copy() + return m + + @classmethod + def from_opensearch(cls, index, using="default"): + m = cls() + m.update_from_opensearch(index, using) + return m + + def resolve_nested(self, field_path): + field = self + nested = [] + parts = field_path.split(".") + for i, step in enumerate(parts): + try: + field = field[step] + except KeyError: + return (), None + if isinstance(field, Nested): + nested.append(".".join(parts[: i + 1])) + return nested, field + + def resolve_field(self, field_path): + field = self + for step in field_path.split("."): + try: + field = field[step] + except KeyError: + return + return field + + def _collect_analysis(self): + analysis = {} + fields = [] + if "_all" in self._meta: + fields.append(Text(**self._meta["_all"])) + + for f in chain(fields, self.properties._collect_fields()): + for analyzer_name in ( + "analyzer", + "normalizer", + "search_analyzer", + "search_quote_analyzer", + ): + if not hasattr(f, analyzer_name): + continue + analyzer = getattr(f, analyzer_name) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + continue + + # merge the definition + # TODO: conflict detection/resolution + for key in d: + analysis.setdefault(key, {}).update(d[key]) + + return analysis + + def save(self, index, using="default"): + from opensearchpy.helpers.index import Index + + index = Index(index, using=using) + index.mapping(self) + return index.save() + + def update_from_opensearch(self, index, using="default"): + opensearch = get_connection(using) + raw = opensearch.indices.get_mapping(index=index) + _, raw = raw.popitem() + self._update_from_dict(raw["mappings"]) + + def _update_from_dict(self, raw): + for name, definition in iteritems(raw.get("properties", {})): + self.field(name, definition) + + # metadata like _all etc + for name, value in iteritems(raw): + if name != "properties": + if isinstance(value, collections_abc.Mapping): + self.meta(name, **value) + else: + self.meta(name, value) + + def update(self, mapping, update_only=False): + for name in mapping: + if update_only and name in self: + # nested and inner objects, merge recursively + if hasattr(self[name], "update"): + # FIXME only merge subfields, not the settings + self[name].update(mapping[name], update_only) + continue + self.field(name, mapping[name]) + + if update_only: + for name in mapping._meta: + if name not in self._meta: + self._meta[name] = mapping._meta[name] + else: + self._meta.update(mapping._meta) + + def __contains__(self, name): + return name in self.properties.properties + + def __getitem__(self, name): + return self.properties.properties[name] + + def __iter__(self): + return iter(self.properties.properties) + + def field(self, *args, **kwargs): + self.properties.field(*args, **kwargs) + return self + + def meta(self, name, params=None, **kwargs): + if not name.startswith("_") and name not in META_FIELDS: + name = "_" + name + + if params and kwargs: + raise ValueError("Meta configs cannot have both value and a dictionary.") + + self._meta[name] = kwargs if params is None else params + return self + + def to_dict(self): + meta = self._meta + + # hard coded serialization of analyzers in _all + if "_all" in meta: + meta = meta.copy() + _all = meta["_all"] = meta["_all"].copy() + for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): + if hasattr(_all.get(f, None), "to_dict"): + _all[f] = _all[f].to_dict() + meta.update(self.properties.to_dict()) + return meta diff --git a/opensearchpy/helpers/mapping.pyi b/opensearchpy/helpers/mapping.pyi new file mode 100644 index 00000000..8dab731a --- /dev/null +++ b/opensearchpy/helpers/mapping.pyi @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .utils import DslBase + +class Properties(DslBase): ... +class Mapping(object): ... diff --git a/opensearchpy/helpers/query.py b/opensearchpy/helpers/query.py new file mode 100644 index 00000000..e132254b --- /dev/null +++ b/opensearchpy/helpers/query.py @@ -0,0 +1,525 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from itertools import chain + +# 'SF' looks unused but the test suite assumes it's available +# from this module so others are liable to do so as well. +from ..helpers.function import SF # noqa: F401 +from ..helpers.function import ScoreFunction +from .utils import DslBase + + +def Q(name_or_query="match_all", **params): + # {"match": {"title": "python"}} + if isinstance(name_or_query, collections_abc.Mapping): + if params: + raise ValueError("Q() cannot accept parameters when passing in a dict.") + if len(name_or_query) != 1: + raise ValueError( + 'Q() can only accept dict with a single query ({"match": {...}}). ' + "Instead it got (%r)" % name_or_query + ) + name, params = name_or_query.copy().popitem() + return Query.get_dsl_class(name)(_expand__to_dot=False, **params) + + # MatchAll() + if isinstance(name_or_query, Query): + if params: + raise ValueError( + "Q() cannot accept parameters when passing in a Query object." + ) + return name_or_query + + # s.query = Q('filtered', query=s.query) + if hasattr(name_or_query, "_proxied"): + return name_or_query._proxied + + # "match", title="python" + return Query.get_dsl_class(name_or_query)(**params) + + +class Query(DslBase): + _type_name = "query" + _type_shortcut = staticmethod(Q) + name = None + + def __add__(self, other): + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__radd__"): + return other.__radd__(self) + return Bool(must=[self, other]) + + def __invert__(self): + return Bool(must_not=[self]) + + def __or__(self, other): + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__ror__"): + return other.__ror__(self) + return Bool(should=[self, other]) + + def __and__(self, other): + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__rand__"): + return other.__rand__(self) + return Bool(must=[self, other]) + + +class MatchAll(Query): + name = "match_all" + + def __add__(self, other): + return other._clone() + + __and__ = __rand__ = __radd__ = __add__ + + def __or__(self, other): + return self + + __ror__ = __or__ + + def __invert__(self): + return MatchNone() + + +EMPTY_QUERY = MatchAll() + + +class MatchNone(Query): + name = "match_none" + + def __add__(self, other): + return self + + __and__ = __rand__ = __radd__ = __add__ + + def __or__(self, other): + return other._clone() + + __ror__ = __or__ + + def __invert__(self): + return MatchAll() + + +class Bool(Query): + name = "bool" + _param_defs = { + "must": {"type": "query", "multi": True}, + "should": {"type": "query", "multi": True}, + "must_not": {"type": "query", "multi": True}, + "filter": {"type": "query", "multi": True}, + } + + def __add__(self, other): + q = self._clone() + if isinstance(other, Bool): + q.must += other.must + q.should += other.should + q.must_not += other.must_not + q.filter += other.filter + else: + q.must.append(other) + return q + + __radd__ = __add__ + + def __or__(self, other): + for q in (self, other): + if isinstance(q, Bool) and not any( + (q.must, q.must_not, q.filter, getattr(q, "minimum_should_match", None)) + ): + other = self if q is other else other + q = q._clone() + if isinstance(other, Bool) and not any( + ( + other.must, + other.must_not, + other.filter, + getattr(other, "minimum_should_match", None), + ) + ): + q.should.extend(other.should) + else: + q.should.append(other) + return q + + return Bool(should=[self, other]) + + __ror__ = __or__ + + @property + def _min_should_match(self): + return getattr( + self, + "minimum_should_match", + 0 if not self.should or (self.must or self.filter) else 1, + ) + + def __invert__(self): + # Because an empty Bool query is treated like + # MatchAll the inverse should be MatchNone + if not any(chain(self.must, self.filter, self.should, self.must_not)): + return MatchNone() + + negations = [] + for q in chain(self.must, self.filter): + negations.append(~q) + + for q in self.must_not: + negations.append(q) + + if self.should and self._min_should_match: + negations.append(Bool(must_not=self.should[:])) + + if len(negations) == 1: + return negations[0] + return Bool(should=negations) + + def __and__(self, other): + q = self._clone() + if isinstance(other, Bool): + q.must += other.must + q.must_not += other.must_not + q.filter += other.filter + q.should = [] + + # reset minimum_should_match as it will get calculated below + if "minimum_should_match" in q._params: + del q._params["minimum_should_match"] + + for qx in (self, other): + # TODO: percentages will fail here + min_should_match = qx._min_should_match + # all subqueries are required + if len(qx.should) <= min_should_match: + q.must.extend(qx.should) + # not all of them are required, use it and remember min_should_match + elif not q.should: + q.minimum_should_match = min_should_match + q.should = qx.should + # all queries are optional, just extend should + elif q._min_should_match == 0 and min_should_match == 0: + q.should.extend(qx.should) + # not all are required, add a should list to the must with proper min_should_match + else: + q.must.append( + Bool(should=qx.should, minimum_should_match=min_should_match) + ) + else: + if not (q.must or q.filter) and q.should: + q._params.setdefault("minimum_should_match", 1) + q.must.append(other) + return q + + __rand__ = __and__ + + +class FunctionScore(Query): + name = "function_score" + _param_defs = { + "query": {"type": "query"}, + "filter": {"type": "query"}, + "functions": {"type": "score_function", "multi": True}, + } + + def __init__(self, **kwargs): + if "functions" in kwargs: + pass + else: + fns = kwargs["functions"] = [] + for name in ScoreFunction._classes: + if name in kwargs: + fns.append({name: kwargs.pop(name)}) + super(FunctionScore, self).__init__(**kwargs) + + +# compound queries +class Boosting(Query): + name = "boosting" + _param_defs = {"positive": {"type": "query"}, "negative": {"type": "query"}} + + +class ConstantScore(Query): + name = "constant_score" + _param_defs = {"query": {"type": "query"}, "filter": {"type": "query"}} + + +class DisMax(Query): + name = "dis_max" + _param_defs = {"queries": {"type": "query", "multi": True}} + + +class Filtered(Query): + name = "filtered" + _param_defs = {"query": {"type": "query"}, "filter": {"type": "query"}} + + +class Indices(Query): + name = "indices" + _param_defs = {"query": {"type": "query"}, "no_match_query": {"type": "query"}} + + +class Percolate(Query): + name = "percolate" + + +# relationship queries +class Nested(Query): + name = "nested" + _param_defs = {"query": {"type": "query"}} + + +class HasChild(Query): + name = "has_child" + _param_defs = {"query": {"type": "query"}} + + +class HasParent(Query): + name = "has_parent" + _param_defs = {"query": {"type": "query"}} + + +class TopChildren(Query): + name = "top_children" + _param_defs = {"query": {"type": "query"}} + + +# compount span queries +class SpanFirst(Query): + name = "span_first" + _param_defs = {"match": {"type": "query"}} + + +class SpanMulti(Query): + name = "span_multi" + _param_defs = {"match": {"type": "query"}} + + +class SpanNear(Query): + name = "span_near" + _param_defs = {"clauses": {"type": "query", "multi": True}} + + +class SpanNot(Query): + name = "span_not" + _param_defs = {"exclude": {"type": "query"}, "include": {"type": "query"}} + + +class SpanOr(Query): + name = "span_or" + _param_defs = {"clauses": {"type": "query", "multi": True}} + + +class FieldMaskingSpan(Query): + name = "field_masking_span" + _param_defs = {"query": {"type": "query"}} + + +class SpanContaining(Query): + name = "span_containing" + _param_defs = {"little": {"type": "query"}, "big": {"type": "query"}} + + +# Original implementation contained +# a typo: remove in v8.0. +SpanContainining = SpanContaining + + +class SpanWithin(Query): + name = "span_within" + _param_defs = {"little": {"type": "query"}, "big": {"type": "query"}} + + +# core queries +class Common(Query): + name = "common" + + +class Fuzzy(Query): + name = "fuzzy" + + +class FuzzyLikeThis(Query): + name = "fuzzy_like_this" + + +class FuzzyLikeThisField(Query): + name = "fuzzy_like_this_field" + + +class RankFeature(Query): + name = "rank_feature" + + +class DistanceFeature(Query): + name = "distance_feature" + + +class GeoBoundingBox(Query): + name = "geo_bounding_box" + + +class GeoDistance(Query): + name = "geo_distance" + + +class GeoDistanceRange(Query): + name = "geo_distance_range" + + +class GeoPolygon(Query): + name = "geo_polygon" + + +class GeoShape(Query): + name = "geo_shape" + + +class GeohashCell(Query): + name = "geohash_cell" + + +class Ids(Query): + name = "ids" + + +class Intervals(Query): + name = "intervals" + + +class Limit(Query): + name = "limit" + + +class Match(Query): + name = "match" + + +class MatchPhrase(Query): + name = "match_phrase" + + +class MatchPhrasePrefix(Query): + name = "match_phrase_prefix" + + +class MatchBoolPrefix(Query): + name = "match_bool_prefix" + + +class Exists(Query): + name = "exists" + + +class MoreLikeThis(Query): + name = "more_like_this" + + +class MoreLikeThisField(Query): + name = "more_like_this_field" + + +class MultiMatch(Query): + name = "multi_match" + + +class Prefix(Query): + name = "prefix" + + +class QueryString(Query): + name = "query_string" + + +class Range(Query): + name = "range" + + +class Regexp(Query): + name = "regexp" + + +class Shape(Query): + name = "shape" + + +class SimpleQueryString(Query): + name = "simple_query_string" + + +class SpanTerm(Query): + name = "span_term" + + +class Template(Query): + name = "template" + + +class Term(Query): + name = "term" + + +class Terms(Query): + name = "terms" + + +class TermsSet(Query): + name = "terms_set" + + +class Wildcard(Query): + name = "wildcard" + + +class Script(Query): + name = "script" + + +class ScriptScore(Query): + name = "script_score" + _param_defs = {"query": {"type": "query"}} + + +class Type(Query): + name = "type" + + +class ParentId(Query): + name = "parent_id" + + +class Wrapper(Query): + name = "wrapper" diff --git a/opensearchpy/helpers/query.pyi b/opensearchpy/helpers/query.pyi new file mode 100644 index 00000000..a963ef05 --- /dev/null +++ b/opensearchpy/helpers/query.pyi @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +from .utils import DslBase + +class Query(DslBase): ... +class MatchAll(Query): ... +class MatchNone(Query): ... +class Bool(Query): ... +class FunctionScore(Query): ... +class Boosting(Query): ... +class ConstantScore(Query): ... +class DisMax(Query): ... +class Filtered(Query): ... +class Indices(Query): ... +class Percolate(Query): ... +class Nested(Query): ... +class HasChild(Query): ... +class HasParent(Query): ... +class TopChildren(Query): ... +class SpanFirst(Query): ... +class SpanMulti(Query): ... +class SpanNear(Query): ... +class SpanNot(Query): ... +class SpanOr(Query): ... +class FieldMaskingSpan(Query): ... +class SpanContaining(Query): ... +class SpanWithin(Query): ... +class Common(Query): ... +class Fuzzy(Query): ... +class FuzzyLikeThis(Query): ... +class FuzzyLikeThisField(Query): ... +class RankFeature(Query): ... +class DistanceFeature(Query): ... +class GeoBoundingBox(Query): ... +class GeoDistance(Query): ... +class GeoDistanceRange(Query): ... +class GeoPolygon(Query): ... +class GeoShape(Query): ... +class GeohashCell(Query): ... +class Ids(Query): ... +class Intervals(Query): ... +class Limit(Query): ... +class Match(Query): ... +class MatchPhrase(Query): ... +class MatchPhrasePrefix(Query): ... +class MatchBoolPrefix(Query): ... +class Exists(Query): ... +class MoreLikeThis(Query): ... +class MoreLikeThisField(Query): ... +class MultiMatch(Query): ... +class Prefix(Query): ... +class QueryString(Query): ... +class Range(Query): ... +class Regexp(Query): ... +class Shape(Query): ... +class SimpleQueryString(Query): ... +class SpanTerm(Query): ... +class Template(Query): ... +class Term(Query): ... +class Terms(Query): ... +class TermsSet(Query): ... +class Wildcard(Query): ... +class Script(Query): ... +class ScriptScore(Query): ... +class Type(Query): ... +class ParentId(Query): ... +class Wrapper(Query): ... + +def Q(name_or_query: Any, **params: Any) -> Any: ... diff --git a/opensearchpy/helpers/response/__init__.py b/opensearchpy/helpers/response/__init__.py new file mode 100644 index 00000000..91e4c044 --- /dev/null +++ b/opensearchpy/helpers/response/__init__.py @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ..utils import AttrDict, AttrList, _wrap +from .hit import Hit, HitMeta + +__all__ = ["Response", "AggResponse", "UpdateByQueryResponse", "Hit", "HitMeta"] + + +class Response(AttrDict): + def __init__(self, search, response, doc_class=None): + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) + super(Response, self).__init__(response) + + def __iter__(self): + return iter(self.hits) + + def __getitem__(self, key): + if isinstance(key, (slice, int)): + # for slicing etc + return self.hits[key] + return super(Response, self).__getitem__(key) + + def __nonzero__(self): + return bool(self.hits) + + __bool__ = __nonzero__ + + def __repr__(self): + return "" % (self.hits or self.aggregations) + + def __len__(self): + return len(self.hits) + + def __getstate__(self): + return self._d_, self._search, self._doc_class + + def __setstate__(self, state): + super(AttrDict, self).__setattr__("_d_", state[0]) + super(AttrDict, self).__setattr__("_search", state[1]) + super(AttrDict, self).__setattr__("_doc_class", state[2]) + + def success(self): + return self._shards.total == self._shards.successful and not self.timed_out + + @property + def hits(self): + if not hasattr(self, "_hits"): + h = self._d_["hits"] + + try: + hits = AttrList(map(self._search._get_result, h["hits"])) + except AttributeError as e: + # avoid raising AttributeError since it will be hidden by the property + raise TypeError("Could not parse hits.", e) + + # avoid assigning _hits into self._d_ + super(AttrDict, self).__setattr__("_hits", hits) + for k in h: + setattr(self._hits, k, _wrap(h[k])) + return self._hits + + @property + def aggregations(self): + return self.aggs + + @property + def aggs(self): + if not hasattr(self, "_aggs"): + aggs = AggResponse( + self._search.aggs, self._search, self._d_.get("aggregations", {}) + ) + + # avoid assigning _aggs into self._d_ + super(AttrDict, self).__setattr__("_aggs", aggs) + return self._aggs + + +class AggResponse(AttrDict): + def __init__(self, aggs, search, data): + super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs}) + super(AggResponse, self).__init__(data) + + def __getitem__(self, attr_name): + if attr_name in self._meta["aggs"]: + # don't do self._meta['aggs'][attr_name] to avoid copying + agg = self._meta["aggs"].aggs[attr_name] + return agg.result(self._meta["search"], self._d_[attr_name]) + return super(AggResponse, self).__getitem__(attr_name) + + def __iter__(self): + for name in self._meta["aggs"]: + yield self[name] + + +class UpdateByQueryResponse(AttrDict): + def __init__(self, search, response, doc_class=None): + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) + super(UpdateByQueryResponse, self).__init__(response) + + def success(self): + return not self.timed_out and not self.failures diff --git a/opensearchpy/helpers/response/__init__.pyi b/opensearchpy/helpers/response/__init__.pyi new file mode 100644 index 00000000..3f3af097 --- /dev/null +++ b/opensearchpy/helpers/response/__init__.pyi @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ..utils import AttrDict + +class Response(AttrDict): ... +class AggResponse(AttrDict): ... +class UpdateByQueryResponse(AttrDict): ... diff --git a/opensearchpy/helpers/response/aggs.py b/opensearchpy/helpers/response/aggs.py new file mode 100644 index 00000000..a5e2e22d --- /dev/null +++ b/opensearchpy/helpers/response/aggs.py @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ..utils import AttrDict, AttrList +from . import AggResponse, Response + + +class Bucket(AggResponse): + def __init__(self, aggs, search, data, field=None): + super(Bucket, self).__init__(aggs, search, data) + + +class FieldBucket(Bucket): + def __init__(self, aggs, search, data, field=None): + if field: + data["key"] = field.deserialize(data["key"]) + super(FieldBucket, self).__init__(aggs, search, data, field) + + +class BucketData(AggResponse): + _bucket_class = Bucket + + def _wrap_bucket(self, data): + return self._bucket_class( + self._meta["aggs"], + self._meta["search"], + data, + field=self._meta.get("field"), + ) + + def __iter__(self): + return iter(self.buckets) + + def __len__(self): + return len(self.buckets) + + def __getitem__(self, key): + if isinstance(key, (int, slice)): + return self.buckets[key] + return super(BucketData, self).__getitem__(key) + + @property + def buckets(self): + if not hasattr(self, "_buckets"): + field = getattr(self._meta["aggs"], "field", None) + if field: + self._meta["field"] = self._meta["search"]._resolve_field(field) + bs = self._d_["buckets"] + if isinstance(bs, list): + bs = AttrList(bs, obj_wrapper=self._wrap_bucket) + else: + bs = AttrDict({k: self._wrap_bucket(bs[k]) for k in bs}) + super(AttrDict, self).__setattr__("_buckets", bs) + return self._buckets + + +class FieldBucketData(BucketData): + _bucket_class = FieldBucket + + +class TopHitsData(Response): + def __init__(self, agg, search, data): + super(AttrDict, self).__setattr__( + "meta", AttrDict({"agg": agg, "search": search}) + ) + super(TopHitsData, self).__init__(search, data) diff --git a/opensearchpy/helpers/response/aggs.pyi b/opensearchpy/helpers/response/aggs.pyi new file mode 100644 index 00000000..ba92e56b --- /dev/null +++ b/opensearchpy/helpers/response/aggs.pyi @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from . import AggResponse as AggResponse +from . import Response as Response + +class Bucket(AggResponse): ... +class FieldBucket(Bucket): ... +class BucketData(AggResponse): ... +class FieldBucketData(BucketData): ... +class TopHitsData(Response): ... diff --git a/opensearchpy/helpers/response/hit.py b/opensearchpy/helpers/response/hit.py new file mode 100644 index 00000000..cf70a821 --- /dev/null +++ b/opensearchpy/helpers/response/hit.py @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ..utils import AttrDict, HitMeta + + +class Hit(AttrDict): + def __init__(self, document): + data = {} + if "_source" in document: + data = document["_source"] + if "fields" in document: + data.update(document["fields"]) + + super(Hit, self).__init__(data) + # assign meta as attribute and not as key in self._d_ + super(AttrDict, self).__setattr__("meta", HitMeta(document)) + + def __getstate__(self): + # add self.meta since it is not in self.__dict__ + return super(Hit, self).__getstate__() + (self.meta,) + + def __setstate__(self, state): + super(AttrDict, self).__setattr__("meta", state[-1]) + super(Hit, self).__setstate__(state[:-1]) + + def __dir__(self): + # be sure to expose meta in dir(self) + return super(Hit, self).__dir__() + ["meta"] + + def __repr__(self): + return "".format( + "/".join( + getattr(self.meta, key) for key in ("index", "id") if key in self.meta + ), + super(Hit, self).__repr__(), + ) diff --git a/opensearchpy/helpers/response/hit.pyi b/opensearchpy/helpers/response/hit.pyi new file mode 100644 index 00000000..ae3cdf00 --- /dev/null +++ b/opensearchpy/helpers/response/hit.pyi @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ..utils import AttrDict + +class Hit(AttrDict): ... diff --git a/opensearchpy/helpers/search.py b/opensearchpy/helpers/search.py new file mode 100644 index 00000000..4fab50aa --- /dev/null +++ b/opensearchpy/helpers/search.py @@ -0,0 +1,827 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import copy + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from six import iteritems, string_types + +from opensearchpy.connection.connections import get_connection +from opensearchpy.exceptions import TransportError +from opensearchpy.helpers import scan + +from ..exceptions import IllegalOperation +from ..helpers.aggs import A, AggBase +from ..helpers.query import Bool, Q +from .response import Hit, Response +from .utils import AttrDict, DslBase, recursive_to_dict + + +class QueryProxy(object): + """ + Simple proxy around DSL objects (queries) that can be called + (to add query/post_filter) and also allows attribute access which is proxied to + the wrapped query. + """ + + def __init__(self, search, attr_name): + self._search = search + self._proxied = None + self._attr_name = attr_name + + def __nonzero__(self): + return self._proxied is not None + + __bool__ = __nonzero__ + + def __call__(self, *args, **kwargs): + s = self._search._clone() + + # we cannot use self._proxied since we just cloned self._search and + # need to access the new self on the clone + proxied = getattr(s, self._attr_name) + if proxied._proxied is None: + proxied._proxied = Q(*args, **kwargs) + else: + proxied._proxied &= Q(*args, **kwargs) + + # always return search to be chainable + return s + + def __getattr__(self, attr_name): + return getattr(self._proxied, attr_name) + + def __setattr__(self, attr_name, value): + if not attr_name.startswith("_"): + self._proxied = Q(self._proxied.to_dict()) + setattr(self._proxied, attr_name, value) + super(QueryProxy, self).__setattr__(attr_name, value) + + def __getstate__(self): + return self._search, self._proxied, self._attr_name + + def __setstate__(self, state): + self._search, self._proxied, self._attr_name = state + + +class ProxyDescriptor(object): + """ + Simple descriptor to enable setting of queries and filters as: + + s = Search() + s.query = Q(...) + + """ + + def __init__(self, name): + self._attr_name = "_%s_proxy" % name + + def __get__(self, instance, owner): + return getattr(instance, self._attr_name) + + def __set__(self, instance, value): + proxy = getattr(instance, self._attr_name) + proxy._proxied = Q(value) + + +class AggsProxy(AggBase, DslBase): + name = "aggs" + + def __init__(self, search): + self._base = self + self._search = search + self._params = {"aggs": {}} + + def to_dict(self): + return super(AggsProxy, self).to_dict().get("aggs", {}) + + +class Request(object): + def __init__(self, using="default", index=None, doc_type=None, extra=None): + self._using = using + + self._index = None + if isinstance(index, (tuple, list)): + self._index = list(index) + elif index: + self._index = [index] + + self._doc_type = [] + self._doc_type_map = {} + if isinstance(doc_type, (tuple, list)): + self._doc_type.extend(doc_type) + elif isinstance(doc_type, collections_abc.Mapping): + self._doc_type.extend(doc_type.keys()) + self._doc_type_map.update(doc_type) + elif doc_type: + self._doc_type.append(doc_type) + + self._params = {} + self._extra = extra or {} + + def __eq__(self, other): + return ( + isinstance(other, Request) + and other._params == self._params + and other._index == self._index + and other._doc_type == self._doc_type + and other.to_dict() == self.to_dict() + ) + + def __copy__(self): + return self._clone() + + def params(self, **kwargs): + """ + Specify query params to be used when executing the search. All the + keyword arguments will override the current values. + + Example:: + + s = Search() + s = s.params(routing='user-1', preference='local') + """ + s = self._clone() + s._params.update(kwargs) + return s + + def index(self, *index): + """ + Set the index for the search. If called empty it will remove all information. + + Example: + + s = Search() + s = s.index('twitter-2015.01.01', 'twitter-2015.01.02') + s = s.index(['twitter-2015.01.01', 'twitter-2015.01.02']) + """ + # .index() resets + s = self._clone() + if not index: + s._index = None + else: + indexes = [] + for i in index: + if isinstance(i, string_types): + indexes.append(i) + elif isinstance(i, list): + indexes += i + elif isinstance(i, tuple): + indexes += list(i) + + s._index = (self._index or []) + indexes + + return s + + def _resolve_field(self, path): + for dt in self._doc_type: + if not hasattr(dt, "_index"): + continue + field = dt._index.resolve_field(path) + if field is not None: + return field + + def _resolve_nested(self, hit, parent_class=None): + doc_class = Hit + + nested_path = [] + nesting = hit["_nested"] + while nesting and "field" in nesting: + nested_path.append(nesting["field"]) + nesting = nesting.get("_nested") + nested_path = ".".join(nested_path) + + if hasattr(parent_class, "_index"): + nested_field = parent_class._index.resolve_field(nested_path) + else: + nested_field = self._resolve_field(nested_path) + + if nested_field is not None: + return nested_field._doc_class + + return doc_class + + def _get_result(self, hit, parent_class=None): + doc_class = Hit + dt = hit.get("_type") + + if "_nested" in hit: + doc_class = self._resolve_nested(hit, parent_class) + + elif dt in self._doc_type_map: + doc_class = self._doc_type_map[dt] + + else: + for doc_type in self._doc_type: + if hasattr(doc_type, "_matches") and doc_type._matches(hit): + doc_class = doc_type + break + + for t in hit.get("inner_hits", ()): + hit["inner_hits"][t] = Response( + self, hit["inner_hits"][t], doc_class=doc_class + ) + + callback = getattr(doc_class, "from_opensearch", doc_class) + return callback(hit) + + def doc_type(self, *doc_type, **kwargs): + """ + Set the type to search through. You can supply a single value or + multiple. Values can be strings or subclasses of ``Document``. + + You can also pass in any keyword arguments, mapping a doc_type to a + callback that should be used instead of the Hit class. + + If no doc_type is supplied any information stored on the instance will + be erased. + + Example: + + s = Search().doc_type('product', 'store', User, custom=my_callback) + """ + # .doc_type() resets + s = self._clone() + if not doc_type and not kwargs: + s._doc_type = [] + s._doc_type_map = {} + else: + s._doc_type.extend(doc_type) + s._doc_type.extend(kwargs.keys()) + s._doc_type_map.update(kwargs) + return s + + def using(self, client): + """ + Associate the search request with an opensearch client. A fresh copy + will be returned with current instance remaining unchanged. + + :arg client: an instance of ``opensearchpy.OpenSearch`` to use or + an alias to look up in ``opensearchpy.connections`` + + """ + s = self._clone() + s._using = client + return s + + def extra(self, **kwargs): + """ + Add extra keys to the request body. Mostly here for backwards + compatibility. + """ + s = self._clone() + if "from_" in kwargs: + kwargs["from"] = kwargs.pop("from_") + s._extra.update(kwargs) + return s + + def _clone(self): + s = self.__class__( + using=self._using, index=self._index, doc_type=self._doc_type + ) + s._doc_type_map = self._doc_type_map.copy() + s._extra = self._extra.copy() + s._params = self._params.copy() + return s + + +class Search(Request): + query = ProxyDescriptor("query") + post_filter = ProxyDescriptor("post_filter") + + def __init__(self, **kwargs): + """ + Search request to opensearch. + + :arg using: `OpenSearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + """ + super(Search, self).__init__(**kwargs) + + self.aggs = AggsProxy(self) + self._sort = [] + self._source = None + self._highlight = {} + self._highlight_opts = {} + self._suggest = {} + self._script_fields = {} + self._response_class = Response + + self._query_proxy = QueryProxy(self, "query") + self._post_filter_proxy = QueryProxy(self, "post_filter") + + def filter(self, *args, **kwargs): + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args, **kwargs): + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + def __iter__(self): + """ + Iterate over the hits. + """ + return iter(self.execute()) + + def __getitem__(self, n): + """ + Support slicing the `Search` instance for pagination. + + Slicing equates to the from/size parameters. E.g.:: + + s = Search().query(...)[0:25] + + is equivalent to:: + + s = Search().query(...).extra(from_=0, size=25) + + """ + s = self._clone() + + if isinstance(n, slice): + # If negative slicing, abort. + if n.start and n.start < 0 or n.stop and n.stop < 0: + raise ValueError("Search does not support negative slicing.") + # OpenSearch won't get all results so we default to size: 10 if + # stop not given. + s._extra["from"] = n.start or 0 + s._extra["size"] = max( + 0, n.stop - (n.start or 0) if n.stop is not None else 10 + ) + return s + else: # This is an index lookup, equivalent to slicing by [n:n+1]. + # If negative index, abort. + if n < 0: + raise ValueError("Search does not support negative indexing.") + s._extra["from"] = n + s._extra["size"] = 1 + return s + + @classmethod + def from_dict(cls, d): + """ + Construct a new `Search` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + s = Search.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "aggs": {...} + }) + s = s.filter('term', published=True) + """ + s = cls() + s.update_from_dict(d) + return s + + def _clone(self): + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + s = super(Search, self)._clone() + + s._response_class = self._response_class + s._sort = self._sort[:] + s._source = copy.copy(self._source) if self._source is not None else None + s._highlight = self._highlight.copy() + s._highlight_opts = self._highlight_opts.copy() + s._suggest = self._suggest.copy() + s._script_fields = self._script_fields.copy() + for x in ("query", "post_filter"): + getattr(s, x)._proxied = getattr(self, x)._proxied + + # copy top-level bucket definitions + if self.aggs._params.get("aggs"): + s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()} + return s + + def response_class(self, cls): + """ + Override the default wrapper used for the response. + """ + s = self._clone() + s._response_class = cls + return s + + def update_from_dict(self, d): + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "post_filter" in d: + self.post_filter._proxied = Q(d.pop("post_filter")) + + aggs = d.pop("aggs", d.pop("aggregations", {})) + if aggs: + self.aggs._params = { + "aggs": {name: A(value) for (name, value) in iteritems(aggs)} + } + if "sort" in d: + self._sort = d.pop("sort") + if "_source" in d: + self._source = d.pop("_source") + if "highlight" in d: + high = d.pop("highlight").copy() + self._highlight = high.pop("fields") + self._highlight_opts = high + if "suggest" in d: + self._suggest = d.pop("suggest") + if "text" in self._suggest: + text = self._suggest.pop("text") + for s in self._suggest.values(): + s.setdefault("text", text) + if "script_fields" in d: + self._script_fields = d.pop("script_fields") + self._extra.update(d) + return self + + def script_fields(self, **kwargs): + """ + Define script fields to be calculated on hits. + + Example:: + + s = Search() + s = s.script_fields(times_two="doc['field'].value * 2") + s = s.script_fields( + times_three={ + 'script': { + 'lang': 'painless', + 'source': "doc['field'].value * params.n", + 'params': {'n': 3} + } + } + ) + + """ + s = self._clone() + for name in kwargs: + if isinstance(kwargs[name], string_types): + kwargs[name] = {"script": kwargs[name]} + s._script_fields.update(kwargs) + return s + + def source(self, fields=None, **kwargs): + """ + Selectively control how the _source field is returned. + + :arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes + + If ``fields`` is None, the entire document will be returned for + each hit. If fields is a dictionary with keys of 'includes' and/or + 'excludes' the fields will be either included or excluded appropriately. + + Calling this multiple times with the same named parameter will override the + previous values with the new ones. + + Example:: + + s = Search() + s = s.source(includes=['obj1.*'], excludes=["*.description"]) + + s = Search() + s = s.source(includes=['obj1.*']).source(excludes=["*.description"]) + + """ + s = self._clone() + + if fields and kwargs: + raise ValueError("You cannot specify fields and kwargs at the same time.") + + if fields is not None: + s._source = fields + return s + + if kwargs and not isinstance(s._source, dict): + s._source = {} + + for key, value in kwargs.items(): + if value is None: + try: + del s._source[key] + except KeyError: + pass + else: + s._source[key] = value + + return s + + def sort(self, *keys): + """ + Add sorting information to the search request. If called without + arguments it will remove all sort requirements. Otherwise it will + replace them. Acceptable arguments are:: + + 'some.field' + '-some.other.field' + {'different.field': {'any': 'dict'}} + + so for example:: + + s = Search().sort( + 'category', + '-title', + {"price" : {"order" : "asc", "mode" : "avg"}} + ) + + will sort by ``category``, ``title`` (in descending order) and + ``price`` in ascending order using the ``avg`` mode. + + The API returns a copy of the Search object and can thus be chained. + """ + s = self._clone() + s._sort = [] + for k in keys: + if isinstance(k, string_types) and k.startswith("-"): + if k[1:] == "_score": + raise IllegalOperation("Sorting by `-_score` is not allowed.") + k = {k[1:]: {"order": "desc"}} + s._sort.append(k) + return s + + def highlight_options(self, **kwargs): + """ + Update the global highlighting options used for this request. For + example:: + + s = Search() + s = s.highlight_options(order='score') + """ + s = self._clone() + s._highlight_opts.update(kwargs) + return s + + def highlight(self, *fields, **kwargs): + """ + Request highlighting of some fields. All keyword arguments passed in will be + used as parameters for all the fields in the ``fields`` parameter. Example:: + + Search().highlight('title', 'body', fragment_size=50) + + will produce the equivalent of:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 50}, + "title": {"fragment_size": 50} + } + } + } + + If you want to have different options for different fields + you can call ``highlight`` twice:: + + Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100) + + which will produce:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 100}, + "title": {"fragment_size": 50} + } + } + } + + """ + s = self._clone() + for f in fields: + s._highlight[f] = kwargs + return s + + def suggest(self, name, text, **kwargs): + """ + Add a suggestions request to the search. + + :arg name: name of the suggestion + :arg text: text to suggest on + + All keyword arguments will be added to the suggestions body. For example:: + + s = Search() + s = s.suggest('suggestion-1', 'OpenSearch', term={'field': 'body'}) + """ + s = self._clone() + s._suggest[name] = {"text": text} + s._suggest[name].update(kwargs) + return s + + def to_dict(self, count=False, **kwargs): + """ + Serialize the search into the dictionary that will be sent over as the + request's body. + + :arg count: a flag to specify if we are interested in a body for count - + no aggregations, no pagination bounds etc. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + + if self.query: + d["query"] = self.query.to_dict() + + # count request doesn't care for sorting and other things + if not count: + if self.post_filter: + d["post_filter"] = self.post_filter.to_dict() + + if self.aggs.aggs: + d.update(self.aggs.to_dict()) + + if self._sort: + d["sort"] = self._sort + + d.update(recursive_to_dict(self._extra)) + + if self._source not in (None, {}): + d["_source"] = self._source + + if self._highlight: + d["highlight"] = {"fields": self._highlight} + d["highlight"].update(self._highlight_opts) + + if self._suggest: + d["suggest"] = self._suggest + + if self._script_fields: + d["script_fields"] = self._script_fields + + d.update(recursive_to_dict(kwargs)) + return d + + def count(self): + """ + Return the number of hits matching the query and filters. Note that + only the actual number is returned. + """ + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": + return self._response.hits.total.value + + opensearch = get_connection(self._using) + + d = self.to_dict(count=True) + # TODO: failed shards detection + return opensearch.count(index=self._index, body=d, **self._params)["count"] + + def execute(self, ignore_cache=False): + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + + :arg ignore_cache: if set to ``True``, consecutive calls will hit + OpenSearch, while cached result will be ignored. Defaults to `False` + """ + if ignore_cache or not hasattr(self, "_response"): + opensearch = get_connection(self._using) + + self._response = self._response_class( + self, + opensearch.search( + index=self._index, body=self.to_dict(), **self._params + ), + ) + return self._response + + def scan(self): + """ + Turn the search into a scan search and return a generator that will + iterate over all the documents matching the query. + + Use ``params`` method to specify any additional arguments you with to + pass to the underlying ``scan`` helper from ``opensearchpy`` + + """ + opensearch = get_connection(self._using) + + for hit in scan( + opensearch, query=self.to_dict(), index=self._index, **self._params + ): + yield self._get_result(hit) + + def delete(self): + """ + delete() executes the query by delegating to delete_by_query() + """ + + opensearch = get_connection(self._using) + + return AttrDict( + opensearch.delete_by_query( + index=self._index, body=self.to_dict(), **self._params + ) + ) + + +class MultiSearch(Request): + """ + Combine multiple :class:`~opensearchpy.Search` objects into a single + request. + """ + + def __init__(self, **kwargs): + super(MultiSearch, self).__init__(**kwargs) + self._searches = [] + + def __getitem__(self, key): + return self._searches[key] + + def __iter__(self): + return iter(self._searches) + + def _clone(self): + ms = super(MultiSearch, self)._clone() + ms._searches = self._searches[:] + return ms + + def add(self, search): + """ + Adds a new :class:`~opensearchpy.Search` object to the request:: + + ms = MultiSearch(index='my-index') + ms = ms.add(Search(doc_type=Category).filter('term', category='python')) + ms = ms.add(Search(doc_type=Blog)) + """ + ms = self._clone() + ms._searches.append(search) + return ms + + def to_dict(self): + out = [] + for s in self._searches: + meta = {} + if s._index: + meta["index"] = s._index + meta.update(s._params) + + out.append(meta) + out.append(s.to_dict()) + + return out + + def execute(self, ignore_cache=False, raise_on_error=True): + """ + Execute the multi search request and return a list of search results. + """ + if ignore_cache or not hasattr(self, "_response"): + opensearch = get_connection(self._using) + + responses = opensearch.msearch( + index=self._index, body=self.to_dict(), **self._params + ) + + out = [] + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): + if raise_on_error: + raise TransportError("N/A", r["error"]["type"], r["error"]) + r = None + else: + r = Response(s, r) + out.append(r) + + self._response = out + + return self._response diff --git a/opensearchpy/helpers/search.pyi b/opensearchpy/helpers/search.pyi new file mode 100644 index 00000000..92b46243 --- /dev/null +++ b/opensearchpy/helpers/search.pyi @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .aggs import AggBase +from .utils import DslBase + +class QueryProxy(object): ... +class ProxyDescriptor(object): ... +class AggsProxy(AggBase, DslBase): ... +class Request(object): ... +class Search(Request): ... +class MultiSearch(Request): ... diff --git a/opensearchpy/helpers/update_by_query.py b/opensearchpy/helpers/update_by_query.py new file mode 100644 index 00000000..3be888bf --- /dev/null +++ b/opensearchpy/helpers/update_by_query.py @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from opensearchpy.connection.connections import get_connection + +from ..helpers.query import Bool, Q +from ..helpers.search import ProxyDescriptor, QueryProxy, Request +from .response import UpdateByQueryResponse +from .utils import recursive_to_dict + + +class UpdateByQuery(Request): + query = ProxyDescriptor("query") + + def __init__(self, **kwargs): + """ + Update by query request to opensearch. + + :arg using: `OpenSearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + + """ + super(UpdateByQuery, self).__init__(**kwargs) + self._response_class = UpdateByQueryResponse + self._script = {} + self._query_proxy = QueryProxy(self, "query") + + def filter(self, *args, **kwargs): + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args, **kwargs): + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + @classmethod + def from_dict(cls, d): + """ + Construct a new `UpdateByQuery` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + ubq = UpdateByQuery.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "script": {...} + }) + ubq = ubq.filter('term', published=True) + """ + u = cls() + u.update_from_dict(d) + return u + + def _clone(self): + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + ubq = super(UpdateByQuery, self)._clone() + + ubq._response_class = self._response_class + ubq._script = self._script.copy() + ubq.query._proxied = self.query._proxied + return ubq + + def response_class(self, cls): + """ + Override the default wrapper used for the response. + """ + ubq = self._clone() + ubq._response_class = cls + return ubq + + def update_from_dict(self, d): + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "script" in d: + self._script = d.pop("script") + self._extra.update(d) + return self + + def script(self, **kwargs): + """ + Define update action to take: + + Note: the API only accepts a single script, so + calling the script multiple times will overwrite. + + Example:: + + ubq = Search() + ubq = ubq.script(source="ctx._source.likes++"") + ubq = ubq.script(source="ctx._source.likes += params.f"", + lang="expression", + params={'f': 3}) + """ + ubq = self._clone() + if ubq._script: + ubq._script = {} + ubq._script.update(kwargs) + return ubq + + def to_dict(self, **kwargs): + """ + Serialize the search into the dictionary that will be sent over as the + request'ubq body. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + if self.query: + d["query"] = self.query.to_dict() + + if self._script: + d["script"] = self._script + + d.update(recursive_to_dict(self._extra)) + d.update(recursive_to_dict(kwargs)) + return d + + def execute(self): + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + """ + opensearch = get_connection(self._using) + + self._response = self._response_class( + self, + opensearch.update_by_query( + index=self._index, body=self.to_dict(), **self._params + ), + ) + return self._response diff --git a/opensearchpy/helpers/update_by_query.pyi b/opensearchpy/helpers/update_by_query.pyi new file mode 100644 index 00000000..90597033 --- /dev/null +++ b/opensearchpy/helpers/update_by_query.pyi @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .search import Request + +class UpdateByQuery(Request): ... diff --git a/opensearchpy/helpers/utils.py b/opensearchpy/helpers/utils.py new file mode 100644 index 00000000..3ebea18e --- /dev/null +++ b/opensearchpy/helpers/utils.py @@ -0,0 +1,601 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from copy import copy + +from six import add_metaclass, iteritems +from six.moves import map + +from opensearchpy.exceptions import UnknownDslObject, ValidationException + +SKIP_VALUES = ("", None) +EXPAND__TO_DOT = True + +DOC_META_FIELDS = frozenset( + ( + "id", + "routing", + ) +) + +META_FIELDS = frozenset( + ( + # OpenSearch metadata fields, except 'type' + "index", + "using", + "score", + "version", + "seq_no", + "primary_term", + ) +).union(DOC_META_FIELDS) + + +def _wrap(val, obj_wrapper=None): + if isinstance(val, collections_abc.Mapping): + return AttrDict(val) if obj_wrapper is None else obj_wrapper(val) + if isinstance(val, list): + return AttrList(val) + return val + + +class AttrList(object): + def __init__(self, p, obj_wrapper=None): + # make iterables into lists + if not isinstance(p, list): + p = list(p) + self._l_ = p + self._obj_wrapper = obj_wrapper + + def __repr__(self): + return repr(self._l_) + + def __eq__(self, other): + if isinstance(other, AttrList): + return other._l_ == self._l_ + # make sure we still equal to a dict with the same data + return other == self._l_ + + def __ne__(self, other): + return not self == other + + def __getitem__(self, k): + p = self._l_[k] + if isinstance(k, slice): + return AttrList(p, obj_wrapper=self._obj_wrapper) + return _wrap(p, self._obj_wrapper) + + def __setitem__(self, k, value): + self._l_[k] = value + + def __iter__(self): + return map(lambda i: _wrap(i, self._obj_wrapper), self._l_) + + def __len__(self): + return len(self._l_) + + def __nonzero__(self): + return bool(self._l_) + + __bool__ = __nonzero__ + + def __getattr__(self, name): + return getattr(self._l_, name) + + def __getstate__(self): + return self._l_, self._obj_wrapper + + def __setstate__(self, state): + self._l_, self._obj_wrapper = state + + +class AttrDict(object): + """ + Helper class to provide attribute like access (read and write) to + dictionaries. Used to provide a convenient way to access both results and + nested dsl dicts. + """ + + def __init__(self, d): + # assign the inner dict manually to prevent __setattr__ from firing + super(AttrDict, self).__setattr__("_d_", d) + + def __contains__(self, key): + return key in self._d_ + + def __nonzero__(self): + return bool(self._d_) + + __bool__ = __nonzero__ + + def __dir__(self): + # introspection for auto-complete in IPython etc + return list(self._d_.keys()) + + def __eq__(self, other): + if isinstance(other, AttrDict): + return other._d_ == self._d_ + # make sure we still equal to a dict with the same data + return other == self._d_ + + def __ne__(self, other): + return not self == other + + def __repr__(self): + r = repr(self._d_) + if len(r) > 60: + r = r[:60] + "...}" + return r + + def __getstate__(self): + return (self._d_,) + + def __setstate__(self, state): + super(AttrDict, self).__setattr__("_d_", state[0]) + + def __getattr__(self, attr_name): + try: + return self.__getitem__(attr_name) + except KeyError: + raise AttributeError( + "{!r} object has no attribute {!r}".format( + self.__class__.__name__, attr_name + ) + ) + + def get(self, key, default=None): + try: + return self.__getattr__(key) + except AttributeError: + if default is not None: + return default + raise + + def __delattr__(self, attr_name): + try: + del self._d_[attr_name] + except KeyError: + raise AttributeError( + "{!r} object has no attribute {!r}".format( + self.__class__.__name__, attr_name + ) + ) + + def __getitem__(self, key): + return _wrap(self._d_[key]) + + def __setitem__(self, key, value): + self._d_[key] = value + + def __delitem__(self, key): + del self._d_[key] + + def __setattr__(self, name, value): + if name in self._d_ or not hasattr(self.__class__, name): + self._d_[name] = value + else: + # there is an attribute on the class (could be property, ..) - don't add it as field + super(AttrDict, self).__setattr__(name, value) + + def __iter__(self): + return iter(self._d_) + + def to_dict(self): + return self._d_ + + +class DslMeta(type): + """ + Base Metaclass for DslBase subclasses that builds a registry of all classes + for given DslBase subclass (== all the query types for the Query subclass + of DslBase). + + It then uses the information from that registry (as well as `name` and + `shortcut` attributes from the base class) to construct any subclass based + on it's name. + + For typical use see `QueryMeta` and `Query` in `opensearchpy.query`. + """ + + _types = {} + + def __init__(cls, name, bases, attrs): + super(DslMeta, cls).__init__(name, bases, attrs) + # skip for DslBase + if not hasattr(cls, "_type_shortcut"): + return + if cls.name is None: + # abstract base class, register it's shortcut + cls._types[cls._type_name] = cls._type_shortcut + # and create a registry for subclasses + if not hasattr(cls, "_classes"): + cls._classes = {} + elif cls.name not in cls._classes: + # normal class, register it + cls._classes[cls.name] = cls + + @classmethod + def get_dsl_type(cls, name): + try: + return cls._types[name] + except KeyError: + raise UnknownDslObject("DSL type %s does not exist." % name) + + +@add_metaclass(DslMeta) +class DslBase(object): + """ + Base class for all DSL objects - queries, filters, aggregations etc. Wraps + a dictionary representing the object's json. + + Provides several feature: + - attribute access to the wrapped dictionary (.field instead of ['field']) + - _clone method returning a copy of self + - to_dict method to serialize into dict (to be sent via opensearch-py) + - basic logical operators (&, | and ~) using a Bool(Filter|Query) TODO: + move into a class specific for Query/Filter + - respects the definition of the class and (de)serializes it's + attributes based on the `_param_defs` definition (for example turning + all values in the `must` attribute into Query objects) + """ + + _param_defs = {} + + @classmethod + def get_dsl_class(cls, name, default=None): + try: + return cls._classes[name] + except KeyError: + if default is not None: + return cls._classes[default] + raise UnknownDslObject( + "DSL class `{}` does not exist in {}.".format(name, cls._type_name) + ) + + def __init__(self, _expand__to_dot=EXPAND__TO_DOT, **params): + self._params = {} + for pname, pvalue in iteritems(params): + if "__" in pname and _expand__to_dot: + pname = pname.replace("__", ".") + self._setattr(pname, pvalue) + + def _repr_params(self): + """Produce a repr of all our parameters to be used in __repr__.""" + return ", ".join( + "{}={!r}".format(n.replace(".", "__"), v) + for (n, v) in sorted(iteritems(self._params)) + # make sure we don't include empty typed params + if "type" not in self._param_defs.get(n, {}) or v + ) + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, self._repr_params()) + + def __eq__(self, other): + return isinstance(other, self.__class__) and other.to_dict() == self.to_dict() + + def __ne__(self, other): + return not self == other + + def __setattr__(self, name, value): + if name.startswith("_"): + return super(DslBase, self).__setattr__(name, value) + return self._setattr(name, value) + + def _setattr(self, name, value): + # if this attribute has special type assigned to it... + if name in self._param_defs: + pinfo = self._param_defs[name] + + if "type" in pinfo: + # get the shortcut used to construct this type (query.Q, aggs.A, etc) + shortcut = self.__class__.get_dsl_type(pinfo["type"]) + + # list of dict(name -> DslBase) + if pinfo.get("multi") and pinfo.get("hash"): + if not isinstance(value, (tuple, list)): + value = (value,) + value = list( + {k: shortcut(v) for (k, v) in iteritems(obj)} for obj in value + ) + elif pinfo.get("multi"): + if not isinstance(value, (tuple, list)): + value = (value,) + value = list(map(shortcut, value)) + + # dict(name -> DslBase), make sure we pickup all the objs + elif pinfo.get("hash"): + value = {k: shortcut(v) for (k, v) in iteritems(value)} + + # single value object, just convert + else: + value = shortcut(value) + self._params[name] = value + + def __getattr__(self, name): + if name.startswith("_"): + raise AttributeError( + "{!r} object has no attribute {!r}".format( + self.__class__.__name__, name + ) + ) + + value = None + try: + value = self._params[name] + except KeyError: + # compound types should never throw AttributeError and return empty + # container instead + if name in self._param_defs: + pinfo = self._param_defs[name] + if pinfo.get("multi"): + value = self._params.setdefault(name, []) + elif pinfo.get("hash"): + value = self._params.setdefault(name, {}) + if value is None: + raise AttributeError( + "{!r} object has no attribute {!r}".format( + self.__class__.__name__, name + ) + ) + + # wrap nested dicts in AttrDict for convenient access + if isinstance(value, collections_abc.Mapping): + return AttrDict(value) + return value + + def to_dict(self): + """ + Serialize the DSL object to plain dict + """ + d = {} + for pname, value in iteritems(self._params): + pinfo = self._param_defs.get(pname) + + # typed param + if pinfo and "type" in pinfo: + # don't serialize empty lists and dicts for typed fields + if value in ({}, []): + continue + + # list of dict(name -> DslBase) + if pinfo.get("multi") and pinfo.get("hash"): + value = list( + {k: v.to_dict() for k, v in iteritems(obj)} for obj in value + ) + + # multi-values are serialized as list of dicts + elif pinfo.get("multi"): + value = list(map(lambda x: x.to_dict(), value)) + + # squash all the hash values into one dict + elif pinfo.get("hash"): + value = {k: v.to_dict() for k, v in iteritems(value)} + + # serialize single values + else: + value = value.to_dict() + + # serialize anything with to_dict method + elif hasattr(value, "to_dict"): + value = value.to_dict() + + d[pname] = value + return {self.name: d} + + def _clone(self): + c = self.__class__() + for attr in self._params: + c._params[attr] = copy(self._params[attr]) + return c + + +class HitMeta(AttrDict): + def __init__(self, document, exclude=("_source", "_fields")): + d = { + k[1:] if k.startswith("_") else k: v + for (k, v) in iteritems(document) + if k not in exclude + } + if "type" in d: + # make sure we are consistent everywhere in python + d["doc_type"] = d.pop("type") + super(HitMeta, self).__init__(d) + + +class ObjectBase(AttrDict): + def __init__(self, meta=None, **kwargs): + meta = meta or {} + for k in list(kwargs): + if k.startswith("_") and k[1:] in META_FIELDS: + meta[k] = kwargs.pop(k) + + super(AttrDict, self).__setattr__("meta", HitMeta(meta)) + + super(ObjectBase, self).__init__(kwargs) + + @classmethod + def __list_fields(cls): + """ + Get all the fields defined for our class, if we have an Index, try + looking at the index mappings as well, mark the fields from Index as + optional. + """ + for name in cls._doc_type.mapping: + field = cls._doc_type.mapping[name] + yield name, field, False + + if hasattr(cls.__class__, "_index"): + if not cls._index._mapping: + return + for name in cls._index._mapping: + # don't return fields that are in _doc_type + if name in cls._doc_type.mapping: + continue + field = cls._index._mapping[name] + yield name, field, True + + @classmethod + def __get_field(cls, name): + try: + return cls._doc_type.mapping[name] + except KeyError: + # fallback to fields on the Index + if hasattr(cls, "_index") and cls._index._mapping: + try: + return cls._index._mapping[name] + except KeyError: + pass + + @classmethod + def from_opensearch(cls, hit): + meta = hit.copy() + data = meta.pop("_source", {}) + doc = cls(meta=meta) + doc._from_dict(data) + return doc + + def _from_dict(self, data): + for k, v in iteritems(data): + f = self.__get_field(k) + if f and f._coerce: + v = f.deserialize(v) + setattr(self, k, v) + + def __getstate__(self): + return self.to_dict(), self.meta._d_ + + def __setstate__(self, state): + data, meta = state + super(AttrDict, self).__setattr__("_d_", {}) + super(AttrDict, self).__setattr__("meta", HitMeta(meta)) + self._from_dict(data) + + def __getattr__(self, name): + try: + return super(ObjectBase, self).__getattr__(name) + except AttributeError: + f = self.__get_field(name) + if hasattr(f, "empty"): + value = f.empty() + if value not in SKIP_VALUES: + setattr(self, name, value) + value = getattr(self, name) + return value + raise + + def to_dict(self, skip_empty=True): + out = {} + for k, v in iteritems(self._d_): + # if this is a mapped field, + f = self.__get_field(k) + if f and f._coerce: + v = f.serialize(v) + + # if someone assigned AttrList, unwrap it + if isinstance(v, AttrList): + v = v._l_ + + if skip_empty: + # don't serialize empty values + # careful not to include numeric zeros + if v in ([], {}, None): + continue + + out[k] = v + return out + + def clean_fields(self): + errors = {} + for name, field, optional in self.__list_fields(): + data = self._d_.get(name, None) + if data is None and optional: + continue + try: + # save the cleaned value + data = field.clean(data) + except ValidationException as e: + errors.setdefault(name, []).append(e) + + if name in self._d_ or data not in ([], {}, None): + self._d_[name] = data + + if errors: + raise ValidationException(errors) + + def clean(self): + pass + + def full_clean(self): + self.clean_fields() + self.clean() + + +def merge(data, new_data, raise_on_conflict=False): + if not ( + isinstance(data, (AttrDict, collections_abc.Mapping)) + and isinstance(new_data, (AttrDict, collections_abc.Mapping)) + ): + raise ValueError( + "You can only merge two dicts! Got {!r} and {!r} instead.".format( + data, new_data + ) + ) + + for key, value in iteritems(new_data): + if ( + key in data + and isinstance(data[key], (AttrDict, collections_abc.Mapping)) + and isinstance(value, (AttrDict, collections_abc.Mapping)) + ): + merge(data[key], value, raise_on_conflict) + elif key in data and data[key] != value and raise_on_conflict: + raise ValueError("Incompatible data for key %r, cannot be merged." % key) + else: + data[key] = value + + +def recursive_to_dict(data): + """Recursively transform objects that potentially have .to_dict() + into dictionary literals by traversing AttrList, AttrDict, list, + tuple, and Mapping types. + """ + if isinstance(data, AttrList): + data = list(data._l_) + elif hasattr(data, "to_dict"): + data = data.to_dict() + if isinstance(data, (list, tuple)): + return type(data)(recursive_to_dict(inner) for inner in data) + elif isinstance(data, collections_abc.Mapping): + return {key: recursive_to_dict(val) for key, val in data.items()} + return data diff --git a/opensearchpy/helpers/utils.pyi b/opensearchpy/helpers/utils.pyi new file mode 100644 index 00000000..74783974 --- /dev/null +++ b/opensearchpy/helpers/utils.pyi @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class AttrList(object): ... +class AttrDict(object): ... +class DslMeta(type): ... +class DslBase(object): ... +class HitMeta(AttrDict): ... +class ObjectBase(AttrDict): ... diff --git a/opensearchpy/helpers/wrappers.py b/opensearchpy/helpers/wrappers.py new file mode 100644 index 00000000..19cf3dec --- /dev/null +++ b/opensearchpy/helpers/wrappers.py @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import operator + +from six import iteritems, string_types + +from .utils import AttrDict + +__all__ = ["Range"] + + +class Range(AttrDict): + OPS = { + "lt": operator.lt, + "lte": operator.le, + "gt": operator.gt, + "gte": operator.ge, + } + + def __init__(self, *args, **kwargs): + if args and (len(args) > 1 or kwargs or not isinstance(args[0], dict)): + raise ValueError( + "Range accepts a single dictionary or a set of keyword arguments." + ) + data = args[0] if args else kwargs + + for k in data: + if k not in self.OPS: + raise ValueError("Range received an unknown operator %r" % k) + + if "gt" in data and "gte" in data: + raise ValueError("You cannot specify both gt and gte for Range.") + + if "lt" in data and "lte" in data: + raise ValueError("You cannot specify both lt and lte for Range.") + + super(Range, self).__init__(args[0] if args else kwargs) + + def __repr__(self): + return "Range(%s)" % ", ".join("%s=%r" % op for op in iteritems(self._d_)) + + def __contains__(self, item): + if isinstance(item, string_types): + return super(Range, self).__contains__(item) + + for op in self.OPS: + if op in self._d_ and not self.OPS[op](item, self._d_[op]): + return False + return True + + @property + def upper(self): + if "lt" in self._d_: + return self._d_["lt"], False + if "lte" in self._d_: + return self._d_["lte"], True + return None, False + + @property + def lower(self): + if "gt" in self._d_: + return self._d_["gt"], False + if "gte" in self._d_: + return self._d_["gte"], True + return None, False diff --git a/opensearchpy/helpers/wrappers.pyi b/opensearchpy/helpers/wrappers.pyi new file mode 100644 index 00000000..fc79c384 --- /dev/null +++ b/opensearchpy/helpers/wrappers.pyi @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .utils import AttrDict + +class Range(AttrDict): ... diff --git a/opensearchpy/plugins/alerting.pyi b/opensearchpy/plugins/alerting.pyi index 7912ceef..d712e762 100644 --- a/opensearchpy/plugins/alerting.pyi +++ b/opensearchpy/plugins/alerting.pyi @@ -6,7 +6,6 @@ # # Modifications Copyright OpenSearch Contributors. See # GitHub history for details. -# from typing import Any, Union diff --git a/opensearchpy/serializer.py b/opensearchpy/serializer.py index c6198e31..295c4af0 100644 --- a/opensearchpy/serializer.py +++ b/opensearchpy/serializer.py @@ -36,6 +36,7 @@ from .compat import string_types from .exceptions import ImproperlyConfigured, SerializationError +from .helpers.utils import AttrList INTEGER_TYPES = () FLOAT_TYPES = (Decimal,) @@ -194,3 +195,15 @@ def loads(self, s, mimetype=None): ) return deserializer.loads(s) + + +class AttrJSONSerializer(JSONSerializer): + def default(self, data): + if isinstance(data, AttrList): + return data._l_ + if hasattr(data, "to_dict"): + return data.to_dict() + return super(AttrJSONSerializer, self).default(data) + + +serializer = AttrJSONSerializer() diff --git a/opensearchpy/serializer.pyi b/opensearchpy/serializer.pyi index 3dc1031f..c68f51ca 100644 --- a/opensearchpy/serializer.pyi +++ b/opensearchpy/serializer.pyi @@ -51,3 +51,5 @@ class Deserializer(object): default_mimetype: str = ..., ) -> None: ... def loads(self, s: str, mimetype: Optional[str] = ...) -> Any: ... + +class AttrJSONSerializer(JSONSerializer): ... diff --git a/opensearchpy/transport.py b/opensearchpy/transport.py index 78c559d8..c1d69d2c 100644 --- a/opensearchpy/transport.py +++ b/opensearchpy/transport.py @@ -197,13 +197,14 @@ def set_connections(self, hosts): :arg hosts: same as `__init__` """ + # construct the connections def _create_connection(host): # if this is not the initial setup look at the existing connection # options and identify connections that haven't changed and can be # kept around. if hasattr(self, "connection_pool"): - for (connection, old_host) in self.connection_pool.connection_opts: + for connection, old_host in self.connection_pool.connection_opts: if old_host == host: return connection diff --git a/setup.py b/setup.py index a359cfb6..e504bcf8 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ import re +import sys from os.path import abspath, dirname, join from setuptools import find_packages, setup @@ -48,21 +49,27 @@ for package in find_packages(where=".", exclude=("test_opensearchpy*",)) if package == module_dir or package.startswith(module_dir + ".") ] - install_requires = [ "urllib3>=1.21.1, <2", "certifi", "requests>=2.4.0, <3.0.0", + "six", + "python-dateutil", + # ipaddress is included in stdlib since python 3.3 + 'ipaddress; python_version<"3.3"', ] tests_require = [ "requests>=2.0.0, <3.0.0", - "coverage", + "coverage<7.0.0", "mock", "pyyaml", - "pytest", + "pytest>=3.0.0", "pytest-cov", + "pytz", "botocore;python_version>='3.6'", ] +if sys.version_info >= (3, 6): + tests_require.append("pytest-mock<4.0.0") async_require = ["aiohttp>=3,<4"] docs_require = ["sphinx", "sphinx_rtd_theme", "myst_parser", "sphinx_copybutton"] @@ -70,7 +77,7 @@ setup( name=package_name, - description="Python low-level client for OpenSearch", + description="Python client for OpenSearch", license="Apache-2.0", url="https://github.com/opensearch-project/opensearch-py", long_description=long_description, diff --git a/test_opensearchpy/run_tests.py b/test_opensearchpy/run_tests.py index f7117ab9..aba69650 100755 --- a/test_opensearchpy/run_tests.py +++ b/test_opensearchpy/run_tests.py @@ -85,7 +85,6 @@ def fetch_opensearch_repo(): def run_all(argv=None): sys.exitfunc = lambda: sys.stderr.write("Shutting down....\n") - # fetch yaml tests anywhere that's not GitHub Actions if "GITHUB_ACTION" not in environ: fetch_opensearch_repo() diff --git a/test_opensearchpy/test_async/test_asyncsigner.py b/test_opensearchpy/test_async/test_asyncsigner.py index 3be40142..3ed6b061 100644 --- a/test_opensearchpy/test_async/test_asyncsigner.py +++ b/test_opensearchpy/test_async/test_asyncsigner.py @@ -8,7 +8,6 @@ # Modifications Copyright OpenSearch Contributors. See # GitHub history for details. - import sys import pytest diff --git a/test_opensearchpy/test_async/test_server/test_helpers.py b/test_opensearchpy/test_async/test_server/test_helpers.py index 387d0a96..beac5268 100644 --- a/test_opensearchpy/test_async/test_server/test_helpers.py +++ b/test_opensearchpy/test_async/test_server/test_helpers.py @@ -34,8 +34,9 @@ import pytest from mock import MagicMock, patch -from opensearchpy import TransportError, helpers -from opensearchpy.helpers import ScanError +from opensearchpy import TransportError +from opensearchpy._async import helpers +from opensearchpy.helpers import BulkIndexError, ScanError pytestmark = pytest.mark.asyncio @@ -135,7 +136,7 @@ async def test_all_errors_from_chunk_are_raised_on_failure(self, async_client): async_client, [{"a": "b"}, {"a": "c"}], index="i", raise_on_error=True ): assert ok - except helpers.BulkIndexError as e: + except BulkIndexError as e: assert 2 == len(e.errors) else: assert False, "exception should have been raised" @@ -346,7 +347,7 @@ async def test_error_is_raised(self, async_client): ) await async_client.cluster.health(wait_for_status="yellow") - with pytest.raises(helpers.BulkIndexError): + with pytest.raises(BulkIndexError): await helpers.async_bulk(async_client, [{"a": 42}, {"a": "c"}], index="i") async def test_ignore_error_if_raised(self, async_client): @@ -371,7 +372,7 @@ async def test_ignore_error_if_raised(self, async_client): ) # ignore only the status code in the `ignore_status` argument - with pytest.raises(helpers.BulkIndexError): + with pytest.raises(BulkIndexError): await helpers.async_bulk( async_client, [{"a": 42}, {"a": "c"}], index="i", ignore_status=(444,) ) @@ -530,7 +531,6 @@ async def test_initial_search_error(self, async_client, scan_teardown): ), ): with patch.object(async_client, "scroll", MockScroll()): - data = [ x async for x in helpers.async_scan( @@ -554,7 +554,6 @@ async def test_initial_search_error(self, async_client, scan_teardown): ), ): with patch.object(async_client, "scroll", MockScroll()) as mock_scroll: - with pytest.raises(ScanError): data = [ x @@ -872,7 +871,6 @@ async def test_children_are_reindexed_correctly( self, async_client, parent_reindex_setup ): await helpers.async_reindex(async_client, "test-index", "real-index") - assert {"question_answer": "question"} == ( await async_client.get(index="real-index", id=42) )["_source"] diff --git a/test_opensearchpy/test_connection.py b/test_opensearchpy/test_connection.py index 93b6913b..3ed1cf0b 100644 --- a/test_opensearchpy/test_connection.py +++ b/test_opensearchpy/test_connection.py @@ -67,6 +67,11 @@ except ImportError: # Old version of pytest for 2.7 and 3.5 from _pytest.monkeypatch import MonkeyPatch +from pytest import raises + +from opensearchpy import OpenSearch, serializer +from opensearchpy.connection import connections + def gzip_decompress(data): buf = gzip.GzipFile(fileobj=io.BytesIO(data), mode="rb") @@ -999,3 +1004,75 @@ def test_requests_connection_error(self): conn = RequestsHttpConnection("not.a.host.name") with pytest.raises(ConnectionError): conn.perform_request("GET", "/") + + +def test_default_connection_is_returned_by_default(): + c = connections.Connections() + + con, con2 = object(), object() + c.add_connection("default", con) + + c.add_connection("not-default", con2) + + assert c.get_connection() is con + + +def test_get_connection_created_connection_if_needed(): + c = connections.Connections() + c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}) + + default = c.get_connection() + local = c.get_connection("local") + + assert isinstance(default, OpenSearch) + assert isinstance(local, OpenSearch) + + assert [{"host": "opensearch.com"}] == default.transport.hosts + assert [{"host": "localhost"}] == local.transport.hosts + + +def test_configure_preserves_unchanged_connections(): + c = connections.Connections() + + c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}) + default = c.get_connection() + local = c.get_connection("local") + + c.configure( + default={"hosts": ["not-opensearch.com"]}, local={"hosts": ["localhost"]} + ) + new_default = c.get_connection() + new_local = c.get_connection("local") + + assert new_local is local + assert new_default is not default + + +def test_remove_connection_removes_both_conn_and_conf(): + c = connections.Connections() + + c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}) + c.add_connection("local2", object()) + + c.remove_connection("default") + c.get_connection("local2") + c.remove_connection("local2") + + with raises(Exception): + c.get_connection("local2") + c.get_connection("default") + + +def test_create_connection_constructs_client(): + c = connections.Connections() + c.create_connection("testing", hosts=["opensearch.com"]) + + con = c.get_connection("testing") + assert [{"host": "opensearch.com"}] == con.transport.hosts + + +def test_create_connection_adds_our_serializer(): + c = connections.Connections() + c.create_connection("testing", hosts=["opensearch.com"]) + + assert c.get_connection("testing").transport.serializer is serializer.serializer diff --git a/test_opensearchpy/test_helpers/__init__.py b/test_opensearchpy/test_helpers/__init__.py new file mode 100644 index 00000000..7e52ae22 --- /dev/null +++ b/test_opensearchpy/test_helpers/__init__.py @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_opensearchpy/test_helpers/conftest.py b/test_opensearchpy/test_helpers/conftest.py new file mode 100644 index 00000000..9c93ccd0 --- /dev/null +++ b/test_opensearchpy/test_helpers/conftest.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from mock import Mock +from pytest import fixture + +from opensearchpy.connection.connections import add_connection, connections + + +@fixture +def mock_client(dummy_response): + client = Mock() + client.search.return_value = dummy_response + add_connection("mock", client) + yield client + connections._conn = {} + connections._kwargs = {} + + +@fixture +def dummy_response(): + return { + "_shards": {"failed": 0, "successful": 10, "total": 10}, + "hits": { + "hits": [ + { + "_index": "test-index", + "_id": "opensearch", + "_score": 12.0, + "_source": {"city": "Amsterdam", "name": "OpenSearch"}, + }, + { + "_index": "test-index", + "_id": "42", + "_score": 11.123, + "_routing": "opensearch", + "_source": { + "name": {"first": "Shay", "last": "Bannon"}, + "lang": "java", + "twitter": "kimchy", + }, + }, + { + "_index": "test-index", + "_id": "47", + "_score": 1, + "_routing": "opensearch", + "_source": { + "name": {"first": "Honza", "last": "Král"}, + "lang": "python", + "twitter": "honzakral", + }, + }, + { + "_index": "test-index", + "_id": "53", + "_score": 16.0, + "_routing": "opensearch", + }, + ], + "max_score": 12.0, + "total": 123, + }, + "timed_out": False, + "took": 123, + } + + +@fixture +def aggs_search(): + from opensearchpy import Search + + s = Search(index="flat-git") + s.aggs.bucket("popular_files", "terms", field="files", size=2).metric( + "line_stats", "stats", field="stats.lines" + ).metric("top_commits", "top_hits", size=2, _source=["stats.*", "committed_date"]) + s.aggs.bucket( + "per_month", "date_histogram", interval="month", field="info.committed_date" + ) + s.aggs.metric("sum_lines", "sum", field="stats.lines") + return s + + +@fixture +def aggs_data(): + return { + "took": 4, + "timed_out": False, + "_shards": {"total": 1, "successful": 1, "failed": 0}, + "hits": {"total": 52, "hits": [], "max_score": 0.0}, + "aggregations": { + "sum_lines": {"value": 25052.0}, + "per_month": { + "buckets": [ + { + "doc_count": 38, + "key": 1393632000000, + "key_as_string": "2014-03-01T00:00:00.000Z", + }, + { + "doc_count": 11, + "key": 1396310400000, + "key_as_string": "2014-04-01T00:00:00.000Z", + }, + { + "doc_count": 3, + "key": 1398902400000, + "key_as_string": "2014-05-01T00:00:00.000Z", + }, + ] + }, + "popular_files": { + "buckets": [ + { + "key": "opensearchpy", + "line_stats": { + "count": 40, + "max": 228.0, + "min": 2.0, + "sum": 2151.0, + "avg": 53.775, + }, + "doc_count": 40, + "top_commits": { + "hits": { + "total": 40, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "_type": "doc", + "_source": { + "stats": { + "files": 1, + "deletions": 0, + "lines": 18, + "insertions": 18, + }, + "committed_date": "2014-05-01T13:32:14", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + { + "key": "test_opensearchpy/test_dsl", + "line_stats": { + "count": 35, + "max": 228.0, + "min": 2.0, + "sum": 1939.0, + "avg": 55.4, + }, + "doc_count": 35, + "top_commits": { + "hits": { + "total": 35, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "_type": "doc", + "_source": { + "stats": { + "files": 3, + "deletions": 18, + "lines": 62, + "insertions": 44, + }, + "committed_date": "2014-05-01T13:30:44", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 120, + }, + }, + } diff --git a/test_opensearchpy/test_helpers.py b/test_opensearchpy/test_helpers/test_actions.py similarity index 99% rename from test_opensearchpy/test_helpers.py rename to test_opensearchpy/test_helpers/test_actions.py index fcf57e6e..3538ae28 100644 --- a/test_opensearchpy/test_helpers.py +++ b/test_opensearchpy/test_helpers/test_actions.py @@ -35,7 +35,7 @@ from opensearchpy import OpenSearch, helpers from opensearchpy.serializer import JSONSerializer -from .test_cases import TestCase +from ..test_cases import TestCase lock_side_effect = threading.Lock() diff --git a/test_opensearchpy/test_helpers/test_aggs.py b/test_opensearchpy/test_helpers/test_aggs.py new file mode 100644 index 00000000..13059ccc --- /dev/null +++ b/test_opensearchpy/test_helpers/test_aggs.py @@ -0,0 +1,365 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from opensearchpy.helpers import aggs, query + + +def test_repr(): + max_score = aggs.Max(field="score") + a = aggs.A("terms", field="tags", aggs={"max_score": max_score}) + + assert "Terms(aggs={'max_score': Max(field='score')}, field='tags')" == repr(a) + + +def test_meta(): + max_score = aggs.Max(field="score") + a = aggs.A( + "terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"} + ) + + assert { + "terms": {"field": "tags"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + "meta": {"some": "metadata"}, + } == a.to_dict() + + +def test_meta_from_dict(): + max_score = aggs.Max(field="score") + a = aggs.A( + "terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"} + ) + + assert aggs.A(a.to_dict()) == a + + +def test_A_creates_proper_agg(): + a = aggs.A("terms", field="tags") + + assert isinstance(a, aggs.Terms) + assert a._params == {"field": "tags"} + + +def test_A_handles_nested_aggs_properly(): + max_score = aggs.Max(field="score") + a = aggs.A("terms", field="tags", aggs={"max_score": max_score}) + + assert isinstance(a, aggs.Terms) + assert a._params == {"field": "tags", "aggs": {"max_score": max_score}} + + +def test_A_passes_aggs_through(): + a = aggs.A("terms", field="tags") + assert aggs.A(a) is a + + +def test_A_from_dict(): + d = { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } + a = aggs.A(d) + + assert isinstance(a, aggs.Terms) + assert a._params == { + "field": "tags", + "aggs": {"per_author": aggs.A("terms", field="author.raw")}, + } + assert a["per_author"] == aggs.A("terms", field="author.raw") + assert a.aggs.per_author == aggs.A("terms", field="author.raw") + + +def test_A_fails_with_incorrect_dict(): + correct_d = { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } + + with raises(Exception): + aggs.A(correct_d, field="f") + + d = correct_d.copy() + del d["terms"] + with raises(Exception): + aggs.A(d) + + d = correct_d.copy() + d["xx"] = {} + with raises(Exception): + aggs.A(d) + + +def test_A_fails_with_agg_and_params(): + a = aggs.A("terms", field="tags") + + with raises(Exception): + aggs.A(a, field="score") + + +def test_buckets_are_nestable(): + a = aggs.Terms(field="tags") + b = a.bucket("per_author", "terms", field="author.raw") + + assert isinstance(b, aggs.Terms) + assert b._params == {"field": "author.raw"} + assert a.aggs == {"per_author": b} + + +def test_metric_inside_buckets(): + a = aggs.Terms(field="tags") + b = a.metric("max_score", "max", field="score") + + # returns bucket so it's chainable + assert a is b + assert a.aggs["max_score"] == aggs.Max(field="score") + + +def test_buckets_equals_counts_subaggs(): + a = aggs.Terms(field="tags") + a.bucket("per_author", "terms", field="author.raw") + b = aggs.Terms(field="tags") + + assert a != b + + +def test_buckets_to_dict(): + a = aggs.Terms(field="tags") + a.bucket("per_author", "terms", field="author.raw") + + assert { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } == a.to_dict() + + a = aggs.Terms(field="tags") + a.metric("max_score", "max", field="score") + + assert { + "terms": {"field": "tags"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } == a.to_dict() + + +def test_nested_buckets_are_reachable_as_getitem(): + a = aggs.Terms(field="tags") + b = a.bucket("per_author", "terms", field="author.raw") + + assert a["per_author"] is not b + assert a["per_author"] == b + + +def test_nested_buckets_are_settable_as_getitem(): + a = aggs.Terms(field="tags") + b = a["per_author"] = aggs.A("terms", field="author.raw") + + assert a.aggs["per_author"] is b + + +def test_filter_can_be_instantiated_using_positional_args(): + a = aggs.Filter(query.Q("term", f=42)) + + assert {"filter": {"term": {"f": 42}}} == a.to_dict() + + assert a == aggs.A("filter", query.Q("term", f=42)) + + +def test_filter_aggregation_as_nested_agg(): + a = aggs.Terms(field="tags") + a.bucket("filtered", "filter", query.Q("term", f=42)) + + assert { + "terms": {"field": "tags"}, + "aggs": {"filtered": {"filter": {"term": {"f": 42}}}}, + } == a.to_dict() + + +def test_filter_aggregation_with_nested_aggs(): + a = aggs.Filter(query.Q("term", f=42)) + a.bucket("testing", "terms", field="tags") + + assert { + "filter": {"term": {"f": 42}}, + "aggs": {"testing": {"terms": {"field": "tags"}}}, + } == a.to_dict() + + +def test_filters_correctly_identifies_the_hash(): + a = aggs.A( + "filters", + filters={ + "group_a": {"term": {"group": "a"}}, + "group_b": {"term": {"group": "b"}}, + }, + ) + + assert { + "filters": { + "filters": { + "group_a": {"term": {"group": "a"}}, + "group_b": {"term": {"group": "b"}}, + } + } + } == a.to_dict() + assert a.filters.group_a == query.Q("term", group="a") + + +def test_bucket_sort_agg(): + bucket_sort_agg = aggs.BucketSort(sort=[{"total_sales": {"order": "desc"}}], size=3) + assert bucket_sort_agg.to_dict() == { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + } + + a = aggs.DateHistogram(field="date", interval="month") + a.bucket("total_sales", "sum", field="price") + a.bucket( + "sales_bucket_sort", + "bucket_sort", + sort=[{"total_sales": {"order": "desc"}}], + size=3, + ) + assert { + "date_histogram": {"field": "date", "interval": "month"}, + "aggs": { + "total_sales": {"sum": {"field": "price"}}, + "sales_bucket_sort": { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + }, + }, + } == a.to_dict() + + +def test_bucket_sort_agg_only_trnunc(): + bucket_sort_agg = aggs.BucketSort(**{"from": 1, "size": 1}) + assert bucket_sort_agg.to_dict() == {"bucket_sort": {"from": 1, "size": 1}} + + a = aggs.DateHistogram(field="date", interval="month") + a.bucket("bucket_truncate", "bucket_sort", **{"from": 1, "size": 1}) + assert { + "date_histogram": {"field": "date", "interval": "month"}, + "aggs": {"bucket_truncate": {"bucket_sort": {"from": 1, "size": 1}}}, + } == a.to_dict() + + +def test_geohash_grid_aggregation(): + a = aggs.GeohashGrid(**{"field": "centroid", "precision": 3}) + + assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_geotile_grid_aggregation(): + a = aggs.GeotileGrid(**{"field": "centroid", "precision": 3}) + + assert {"geotile_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_boxplot_aggregation(): + a = aggs.Boxplot(field="load_time") + + assert {"boxplot": {"field": "load_time"}} == a.to_dict() + + +def test_rare_terms_aggregation(): + a = aggs.RareTerms(field="the-field") + a.bucket("total_sales", "sum", field="price") + a.bucket( + "sales_bucket_sort", + "bucket_sort", + sort=[{"total_sales": {"order": "desc"}}], + size=3, + ) + + assert { + "aggs": { + "sales_bucket_sort": { + "bucket_sort": {"size": 3, "sort": [{"total_sales": {"order": "desc"}}]} + }, + "total_sales": {"sum": {"field": "price"}}, + }, + "rare_terms": {"field": "the-field"}, + } == a.to_dict() + + +def test_variable_width_histogram_aggregation(): + a = aggs.VariableWidthHistogram(field="price", buckets=2) + assert {"variable_width_histogram": {"buckets": 2, "field": "price"}} == a.to_dict() + + +def test_median_absolute_deviation_aggregation(): + a = aggs.MedianAbsoluteDeviation(field="rating") + + assert {"median_absolute_deviation": {"field": "rating"}} == a.to_dict() + + +def test_t_test_aggregation(): + a = aggs.TTest( + a={"field": "startup_time_before"}, + b={"field": "startup_time_after"}, + type="paired", + ) + + assert { + "t_test": { + "a": {"field": "startup_time_before"}, + "b": {"field": "startup_time_after"}, + "type": "paired", + } + } == a.to_dict() + + +def test_inference_aggregation(): + a = aggs.Inference(model_id="model-id", buckets_path={"agg_name": "agg_name"}) + assert { + "inference": {"buckets_path": {"agg_name": "agg_name"}, "model_id": "model-id"} + } == a.to_dict() + + +def test_moving_percentiles_aggregation(): + a = aggs.DateHistogram() + a.bucket("the_percentile", "percentiles", field="price", percents=[1.0, 99.0]) + a.pipeline( + "the_movperc", "moving_percentiles", buckets_path="the_percentile", window=10 + ) + + assert { + "aggs": { + "the_movperc": { + "moving_percentiles": {"buckets_path": "the_percentile", "window": 10} + }, + "the_percentile": { + "percentiles": {"field": "price", "percents": [1.0, 99.0]} + }, + }, + "date_histogram": {}, + } == a.to_dict() + + +def test_normalize_aggregation(): + a = aggs.Normalize(buckets_path="normalized", method="percent_of_sum") + assert { + "normalize": {"buckets_path": "normalized", "method": "percent_of_sum"} + } == a.to_dict() diff --git a/test_opensearchpy/test_helpers/test_analysis.py b/test_opensearchpy/test_helpers/test_analysis.py new file mode 100644 index 00000000..49a1d1fd --- /dev/null +++ b/test_opensearchpy/test_helpers/test_analysis.py @@ -0,0 +1,226 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from opensearchpy.helpers import analysis + + +def test_analyzer_serializes_as_name(): + a = analysis.analyzer("my_analyzer") + + assert "my_analyzer" == a.to_dict() + + +def test_analyzer_has_definition(): + a = analysis.CustomAnalyzer( + "my_analyzer", tokenizer="keyword", filter=["lowercase"] + ) + + assert { + "type": "custom", + "tokenizer": "keyword", + "filter": ["lowercase"], + } == a.get_definition() + + +def test_simple_multiplexer_filter(): + a = analysis.analyzer( + "my_analyzer", + tokenizer="keyword", + filter=[ + analysis.token_filter( + "my_multi", "multiplexer", filters=["lowercase", "lowercase, stop"] + ) + ], + ) + + assert { + "analyzer": { + "my_analyzer": { + "filter": ["my_multi"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "my_multi": { + "filters": ["lowercase", "lowercase, stop"], + "type": "multiplexer", + } + }, + } == a.get_analysis_definition() + + +def test_multiplexer_with_custom_filter(): + a = analysis.analyzer( + "my_analyzer", + tokenizer="keyword", + filter=[ + analysis.token_filter( + "my_multi", + "multiplexer", + filters=[ + [analysis.token_filter("en", "snowball", language="English")], + "lowercase, stop", + ], + ) + ], + ) + + assert { + "analyzer": { + "my_analyzer": { + "filter": ["my_multi"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "en": {"type": "snowball", "language": "English"}, + "my_multi": {"filters": ["en", "lowercase, stop"], "type": "multiplexer"}, + }, + } == a.get_analysis_definition() + + +def test_conditional_token_filter(): + a = analysis.analyzer( + "my_cond", + tokenizer=analysis.tokenizer("keyword"), + filter=[ + analysis.token_filter( + "testing", + "condition", + script={"source": "return true"}, + filter=[ + "lowercase", + analysis.token_filter("en", "snowball", language="English"), + ], + ), + "stop", + ], + ) + + assert { + "analyzer": { + "my_cond": { + "filter": ["testing", "stop"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "en": {"language": "English", "type": "snowball"}, + "testing": { + "script": {"source": "return true"}, + "filter": ["lowercase", "en"], + "type": "condition", + }, + }, + } == a.get_analysis_definition() + + +def test_conflicting_nested_filters_cause_error(): + a = analysis.analyzer( + "my_cond", + tokenizer=analysis.tokenizer("keyword"), + filter=[ + analysis.token_filter("en", "stemmer", language="english"), + analysis.token_filter( + "testing", + "condition", + script={"source": "return true"}, + filter=[ + "lowercase", + analysis.token_filter("en", "snowball", language="English"), + ], + ), + ], + ) + + with raises(ValueError): + a.get_analysis_definition() + + +def test_normalizer_serializes_as_name(): + n = analysis.normalizer("my_normalizer") + + assert "my_normalizer" == n.to_dict() + + +def test_normalizer_has_definition(): + n = analysis.CustomNormalizer( + "my_normalizer", filter=["lowercase", "asciifolding"], char_filter=["quote"] + ) + + assert { + "type": "custom", + "filter": ["lowercase", "asciifolding"], + "char_filter": ["quote"], + } == n.get_definition() + + +def test_tokenizer(): + t = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) + + assert t.to_dict() == "trigram" + assert {"type": "nGram", "min_gram": 3, "max_gram": 3} == t.get_definition() + + +def test_custom_analyzer_can_collect_custom_items(): + trigram = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) + my_stop = analysis.token_filter("my_stop", "stop", stopwords=["a", "b"]) + umlauts = analysis.char_filter("umlauts", "pattern_replace", mappings=["ü=>ue"]) + a = analysis.analyzer( + "my_analyzer", + tokenizer=trigram, + filter=["lowercase", my_stop], + char_filter=["html_strip", umlauts], + ) + + assert a.to_dict() == "my_analyzer" + assert { + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "trigram", + "filter": ["lowercase", "my_stop"], + "char_filter": ["html_strip", "umlauts"], + } + }, + "tokenizer": {"trigram": trigram.get_definition()}, + "filter": {"my_stop": my_stop.get_definition()}, + "char_filter": {"umlauts": umlauts.get_definition()}, + } == a.get_analysis_definition() + + +def test_stemmer_analyzer_can_pass_name(): + t = analysis.token_filter( + "my_english_filter", name="minimal_english", type="stemmer" + ) + assert t.to_dict() == "my_english_filter" + assert {"type": "stemmer", "name": "minimal_english"} == t.get_definition() diff --git a/test_opensearchpy/test_helpers/test_document.py b/test_opensearchpy/test_helpers/test_document.py new file mode 100644 index 00000000..086bde17 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_document.py @@ -0,0 +1,640 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + +import codecs +import ipaddress +import pickle +from datetime import datetime +from hashlib import sha256 + +from pytest import raises + +from opensearchpy import Index, InnerDoc, Mapping, Range, analyzer +from opensearchpy.exceptions import IllegalOperation, ValidationException +from opensearchpy.helpers import document, field, utils + + +class MyInner(InnerDoc): + old_field = field.Text() + + +class MyDoc(document.Document): + title = field.Keyword() + name = field.Text() + created_at = field.Date() + inner = field.Object(MyInner) + + +class MySubDoc(MyDoc): + name = field.Keyword() + + class Index: + name = "default-index" + + +class MyDoc2(document.Document): + extra = field.Long() + + +class MyMultiSubDoc(MyDoc2, MySubDoc): + pass + + +class Comment(document.InnerDoc): + title = field.Text() + tags = field.Keyword(multi=True) + + +class DocWithNested(document.Document): + comments = field.Nested(Comment) + + class Index: + name = "test-doc-with-nested" + + +class SimpleCommit(document.Document): + files = field.Text(multi=True) + + class Index: + name = "test-git" + + +class Secret(str): + pass + + +class SecretField(field.CustomField): + builtin_type = "text" + + def _serialize(self, data): + return codecs.encode(data, "rot_13") + + def _deserialize(self, data): + if isinstance(data, Secret): + return data + return Secret(codecs.decode(data, "rot_13")) + + +class SecretDoc(document.Document): + title = SecretField(index="no") + + class Index: + name = "test-secret-doc" + + +class NestedSecret(document.Document): + secrets = field.Nested(SecretDoc) + + class Index: + name = "test-nested-secret" + + +class OptionalObjectWithRequiredField(document.Document): + comments = field.Nested(properties={"title": field.Keyword(required=True)}) + + class Index: + name = "test-required" + + +class Host(document.Document): + ip = field.Ip() + + class Index: + name = "test-host" + + +def test_range_serializes_properly(): + class D(document.Document): + lr = field.LongRange() + + d = D(lr=Range(lt=42)) + assert 40 in d.lr + assert 47 not in d.lr + assert {"lr": {"lt": 42}} == d.to_dict() + + d = D(lr={"lt": 42}) + assert {"lr": {"lt": 42}} == d.to_dict() + + +def test_range_deserializes_properly(): + class D(document.InnerDoc): + lr = field.LongRange() + + d = D.from_opensearch({"lr": {"lt": 42}}, True) + assert isinstance(d.lr, Range) + assert 40 in d.lr + assert 47 not in d.lr + + +def test_resolve_nested(): + nested, field = NestedSecret._index.resolve_nested("secrets.title") + assert nested == ["secrets"] + assert field is NestedSecret._doc_type.mapping["secrets"]["title"] + + +def test_conflicting_mapping_raises_error_in_index_to_dict(): + class A(document.Document): + name = field.Text() + + class B(document.Document): + name = field.Keyword() + + i = Index("i") + i.document(A) + i.document(B) + + with raises(ValueError): + i.to_dict() + + +def test_ip_address_serializes_properly(): + host = Host(ip=ipaddress.IPv4Address("10.0.0.1")) + + assert {"ip": "10.0.0.1"} == host.to_dict() + + +def test_matches_uses_index(): + assert SimpleCommit._matches({"_index": "test-git"}) + assert not SimpleCommit._matches({"_index": "not-test-git"}) + + +def test_matches_with_no_name_always_matches(): + class D(document.Document): + pass + + assert D._matches({}) + assert D._matches({"_index": "whatever"}) + + +def test_matches_accepts_wildcards(): + class MyDoc(document.Document): + class Index: + name = "my-*" + + assert MyDoc._matches({"_index": "my-index"}) + assert not MyDoc._matches({"_index": "not-my-index"}) + + +def test_assigning_attrlist_to_field(): + sc = SimpleCommit() + ls = ["README", "README.rst"] + sc.files = utils.AttrList(ls) + + assert sc.to_dict()["files"] is ls + + +def test_optional_inner_objects_are_not_validated_if_missing(): + d = OptionalObjectWithRequiredField() + + assert d.full_clean() is None + + +def test_custom_field(): + s = SecretDoc(title=Secret("Hello")) + + assert {"title": "Uryyb"} == s.to_dict() + assert s.title == "Hello" + + s = SecretDoc.from_opensearch({"_source": {"title": "Uryyb"}}) + assert s.title == "Hello" + assert isinstance(s.title, Secret) + + +def test_custom_field_mapping(): + assert { + "properties": {"title": {"index": "no", "type": "text"}} + } == SecretDoc._doc_type.mapping.to_dict() + + +def test_custom_field_in_nested(): + s = NestedSecret() + s.secrets.append(SecretDoc(title=Secret("Hello"))) + + assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict() + assert s.secrets[0].title == "Hello" + + +def test_multi_works_after_doc_has_been_saved(): + c = SimpleCommit() + c.full_clean() + c.files.append("setup.py") + + assert c.to_dict() == {"files": ["setup.py"]} + + +def test_multi_works_in_nested_after_doc_has_been_serialized(): + # Issue #359 + c = DocWithNested(comments=[Comment(title="First!")]) + + assert [] == c.comments[0].tags + assert {"comments": [{"title": "First!"}]} == c.to_dict() + assert [] == c.comments[0].tags + + +def test_null_value_for_object(): + d = MyDoc(inner=None) + + assert d.inner is None + + +def test_inherited_doc_types_can_override_index(): + class MyDocDifferentIndex(MySubDoc): + class Index: + name = "not-default-index" + settings = {"number_of_replicas": 0} + aliases = {"a": {}} + analyzers = [analyzer("my_analizer", tokenizer="keyword")] + + assert MyDocDifferentIndex._index._name == "not-default-index" + assert MyDocDifferentIndex()._get_index() == "not-default-index" + assert MyDocDifferentIndex._index.to_dict() == { + "aliases": {"a": {}}, + "mappings": { + "properties": { + "created_at": {"type": "date"}, + "inner": { + "type": "object", + "properties": {"old_field": {"type": "text"}}, + }, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + } + }, + "settings": { + "analysis": { + "analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}} + }, + "number_of_replicas": 0, + }, + } + + +def test_to_dict_with_meta(): + d = MySubDoc(title="hello") + d.meta.routing = "some-parent" + + assert { + "_index": "default-index", + "_routing": "some-parent", + "_source": {"title": "hello"}, + } == d.to_dict(True) + + +def test_to_dict_with_meta_includes_custom_index(): + d = MySubDoc(title="hello") + d.meta.index = "other-index" + + assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True) + + +def test_to_dict_without_skip_empty_will_include_empty_fields(): + d = MySubDoc(tags=[], title=None, inner={}) + + assert {} == d.to_dict() + assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False) + + +def test_attribute_can_be_removed(): + d = MyDoc(title="hello") + + del d.title + assert "title" not in d._d_ + + +def test_doc_type_can_be_correctly_pickled(): + d = DocWithNested( + title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42} + ) + s = pickle.dumps(d) + + d2 = pickle.loads(s) + + assert d2 == d + assert 42 == d2.meta.id + assert "Hello World!" == d2.title + assert [{"title": "hellp"}] == d2.comments + assert isinstance(d2.comments[0], Comment) + + +def test_meta_is_accessible_even_on_empty_doc(): + d = MyDoc() + d.meta + + d = MyDoc(title="aaa") + d.meta + + +def test_meta_field_mapping(): + class User(document.Document): + username = field.Text() + + class Meta: + all = document.MetaField(enabled=False) + _index = document.MetaField(enabled=True) + dynamic = document.MetaField("strict") + dynamic_templates = document.MetaField([42]) + + assert { + "properties": {"username": {"type": "text"}}, + "_all": {"enabled": False}, + "_index": {"enabled": True}, + "dynamic": "strict", + "dynamic_templates": [42], + } == User._doc_type.mapping.to_dict() + + +def test_multi_value_fields(): + class Blog(document.Document): + tags = field.Keyword(multi=True) + + b = Blog() + assert [] == b.tags + b.tags.append("search") + b.tags.append("python") + assert ["search", "python"] == b.tags + + +def test_docs_with_properties(): + class User(document.Document): + pwd_hash = field.Text() + + def check_password(self, pwd): + return sha256(pwd).hexdigest() == self.pwd_hash + + @property + def password(self): + raise AttributeError("readonly") + + @password.setter + def password(self, pwd): + self.pwd_hash = sha256(pwd).hexdigest() + + u = User(pwd_hash=sha256(b"secret").hexdigest()) + assert u.check_password(b"secret") + assert not u.check_password(b"not-secret") + + u.password = b"not-secret" + assert "password" not in u._d_ + assert not u.check_password(b"secret") + assert u.check_password(b"not-secret") + + with raises(AttributeError): + u.password + + +def test_nested_can_be_assigned_to(): + d1 = DocWithNested(comments=[Comment(title="First!")]) + d2 = DocWithNested() + + d2.comments = d1.comments + assert isinstance(d1.comments[0], Comment) + assert d2.comments == [{"title": "First!"}] + assert {"comments": [{"title": "First!"}]} == d2.to_dict() + assert isinstance(d2.comments[0], Comment) + + +def test_nested_can_be_none(): + d = DocWithNested(comments=None, title="Hello World!") + + assert {"title": "Hello World!"} == d.to_dict() + + +def test_nested_defaults_to_list_and_can_be_updated(): + md = DocWithNested() + + assert [] == md.comments + + md.comments.append({"title": "hello World!"}) + assert {"comments": [{"title": "hello World!"}]} == md.to_dict() + + +def test_to_dict_is_recursive_and_can_cope_with_multi_values(): + md = MyDoc(name=["a", "b", "c"]) + md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")] + + assert isinstance(md.inner[0], MyInner) + + assert { + "name": ["a", "b", "c"], + "inner": [{"old_field": "of1"}, {"old_field": "of2"}], + } == md.to_dict() + + +def test_to_dict_ignores_empty_collections(): + md = MySubDoc(name="", address={}, count=0, valid=False, tags=[]) + + assert {"name": "", "count": 0, "valid": False} == md.to_dict() + + +def test_declarative_mapping_definition(): + assert issubclass(MyDoc, document.Document) + assert hasattr(MyDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "text"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MyDoc._doc_type.mapping.to_dict() + + +def test_you_can_supply_own_mapping_instance(): + class MyD(document.Document): + title = field.Text() + + class Meta: + mapping = Mapping() + mapping.meta("_all", enabled=False) + + assert { + "_all": {"enabled": False}, + "properties": {"title": {"type": "text"}}, + } == MyD._doc_type.mapping.to_dict() + + +def test_document_can_be_created_dynamically(): + n = datetime.now() + md = MyDoc(title="hello") + md.name = "My Fancy Document!" + md.created_at = n + + inner = md.inner + # consistent returns + assert inner is md.inner + inner.old_field = "Already defined." + + md.inner.new_field = ["undefined", "field"] + + assert { + "title": "hello", + "name": "My Fancy Document!", + "created_at": n, + "inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]}, + } == md.to_dict() + + +def test_invalid_date_will_raise_exception(): + md = MyDoc() + md.created_at = "not-a-date" + with raises(ValidationException): + md.full_clean() + + +def test_document_inheritance(): + assert issubclass(MySubDoc, MyDoc) + assert issubclass(MySubDoc, document.Document) + assert hasattr(MySubDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MySubDoc._doc_type.mapping.to_dict() + + +def test_child_class_can_override_parent(): + class A(document.Document): + o = field.Object(dynamic=False, properties={"a": field.Text()}) + + class B(A): + o = field.Object(dynamic="strict", properties={"b": field.Text()}) + + assert { + "properties": { + "o": { + "dynamic": "strict", + "properties": {"a": {"type": "text"}, "b": {"type": "text"}}, + "type": "object", + } + } + } == B._doc_type.mapping.to_dict() + + +def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict(): + md = MySubDoc(meta={"id": 42}, name="My First doc!") + + md.meta.index = "my-index" + assert md.meta.index == "my-index" + assert md.meta.id == 42 + assert {"name": "My First doc!"} == md.to_dict() + assert {"id": 42, "index": "my-index"} == md.meta.to_dict() + + +def test_index_inheritance(): + assert issubclass(MyMultiSubDoc, MySubDoc) + assert issubclass(MyMultiSubDoc, MyDoc2) + assert issubclass(MyMultiSubDoc, document.Document) + assert hasattr(MyMultiSubDoc, "_doc_type") + assert hasattr(MyMultiSubDoc, "_index") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + "extra": {"type": "long"}, + } + } == MyMultiSubDoc._doc_type.mapping.to_dict() + + +def test_meta_fields_can_be_set_directly_in_init(): + p = object() + md = MyDoc(_id=p, title="Hello World!") + + assert md.meta.id is p + + +def test_save_no_index(mock_client): + md = MyDoc() + with raises(ValidationException): + md.save(using="mock") + + +def test_delete_no_index(mock_client): + md = MyDoc() + with raises(ValidationException): + md.delete(using="mock") + + +def test_update_no_fields(): + md = MyDoc() + with raises(IllegalOperation): + md.update() + + +def test_search_with_custom_alias_and_index(mock_client): + search_object = MyDoc.search( + using="staging", index=["custom_index1", "custom_index2"] + ) + + assert search_object._using == "staging" + assert search_object._index == ["custom_index1", "custom_index2"] + + +def test_from_opensearch_respects_underscored_non_meta_fields(): + doc = { + "_index": "test-index", + "_id": "opensearch", + "_score": 12.0, + "fields": {"hello": "world", "_routing": "opensearch", "_tags": ["search"]}, + "_source": { + "city": "Amsterdam", + "name": "OpenSearch", + "_tagline": "You know, for search", + }, + } + + class Company(document.Document): + class Index: + name = "test-company" + + c = Company.from_opensearch(doc) + + assert c.meta.fields._tags == ["search"] + assert c.meta.fields._routing == "opensearch" + assert c._tagline == "You know, for search" + + +def test_nested_and_object_inner_doc(): + class MySubDocWithNested(MyDoc): + nested_inner = field.Nested(MyInner) + + props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"] + assert props == { + "created_at": {"type": "date"}, + "inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"}, + "name": {"type": "text"}, + "nested_inner": { + "properties": {"old_field": {"type": "text"}}, + "type": "nested", + }, + "title": {"type": "keyword"}, + } diff --git a/test_opensearchpy/test_helpers/test_faceted_search.py b/test_opensearchpy/test_helpers/test_faceted_search.py new file mode 100644 index 00000000..066fc9d4 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_faceted_search.py @@ -0,0 +1,204 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from opensearchpy.helpers.faceted_search import ( + DateHistogramFacet, + FacetedSearch, + TermsFacet, +) + + +class BlogSearch(FacetedSearch): + doc_types = ["user", "post"] + fields = ( + "title^5", + "body", + ) + + facets = { + "category": TermsFacet(field="category.raw"), + "tags": TermsFacet(field="tags"), + } + + +def test_query_is_created_properly(): + bs = BlogSearch("python search") + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_query_is_created_properly_with_sort_tuple(): + bs = BlogSearch("python search", sort=("category", "-title")) + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + "sort": ["category", {"title": {"order": "desc"}}], + } == s.to_dict() + + +def test_filter_is_applied_to_search_but_not_relevant_facet(): + bs = BlogSearch("python search", filters={"category": "opensearch"}) + s = bs.build_search() + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["opensearch"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "post_filter": {"terms": {"category.raw": ["opensearch"]}}, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_filters_are_applied_to_search_ant_relevant_facets(): + bs = BlogSearch( + "python search", + filters={"category": "opensearch", "tags": ["python", "django"]}, + ) + s = bs.build_search() + + d = s.to_dict() + + # we need to test post_filter without relying on order + f = d["post_filter"]["bool"].pop("must") + assert len(f) == 2 + assert {"terms": {"category.raw": ["opensearch"]}} in f + assert {"terms": {"tags": ["python", "django"]}} in f + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["opensearch"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"terms": {"tags": ["python", "django"]}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "post_filter": {"bool": {}}, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == d + + +def test_date_histogram_facet_with_1970_01_01_date(): + dhf = DateHistogramFacet() + assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) + assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) + + +@pytest.mark.parametrize( + ["interval_type", "interval"], + [ + ("interval", "year"), + ("calendar_interval", "year"), + ("interval", "month"), + ("calendar_interval", "month"), + ("interval", "week"), + ("calendar_interval", "week"), + ("interval", "day"), + ("calendar_interval", "day"), + ("fixed_interval", "day"), + ("interval", "hour"), + ("fixed_interval", "hour"), + ("interval", "1Y"), + ("calendar_interval", "1Y"), + ("interval", "1M"), + ("calendar_interval", "1M"), + ("interval", "1w"), + ("calendar_interval", "1w"), + ("interval", "1d"), + ("calendar_interval", "1d"), + ("fixed_interval", "1d"), + ("interval", "1h"), + ("fixed_interval", "1h"), + ], +) +def test_date_histogram_interval_types(interval_type, interval): + dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) + assert dhf.get_aggregation().to_dict() == { + "date_histogram": { + "field": "@timestamp", + interval_type: interval, + "min_doc_count": 0, + } + } + dhf.get_value_filter(datetime.now()) + + +def test_date_histogram_no_interval_keyerror(): + dhf = DateHistogramFacet(field="@timestamp") + with pytest.raises(KeyError) as e: + dhf.get_value_filter(datetime.now()) + assert str(e.value) == "'interval'" diff --git a/test_opensearchpy/test_helpers/test_field.py b/test_opensearchpy/test_helpers/test_field.py new file mode 100644 index 00000000..15b51c52 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_field.py @@ -0,0 +1,224 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import base64 +import sys +from datetime import datetime +from ipaddress import ip_address + +import pytest +from dateutil import tz + +from opensearchpy import InnerDoc, Range, ValidationException +from opensearchpy.helpers import field + + +def test_date_range_deserialization(): + data = {"lt": "2018-01-01T00:30:10"} + + r = field.DateRange().deserialize(data) + + assert isinstance(r, Range) + assert r.lt == datetime(2018, 1, 1, 0, 30, 10) + + +def test_boolean_deserialization(): + bf = field.Boolean() + + assert not bf.deserialize("false") + assert not bf.deserialize(False) + assert not bf.deserialize("") + assert not bf.deserialize(0) + + assert bf.deserialize(True) + assert bf.deserialize("true") + assert bf.deserialize(1) + + +def test_date_field_can_have_default_tz(): + f = field.Date(default_timezone="UTC") + now = datetime.now() + + now_with_tz = f._deserialize(now) + + assert now_with_tz.tzinfo == tz.gettz("UTC") + assert now.isoformat() + "+00:00" == now_with_tz.isoformat() + + now_with_tz = f._deserialize(now.isoformat()) + + assert now_with_tz.tzinfo == tz.gettz("UTC") + assert now.isoformat() + "+00:00" == now_with_tz.isoformat() + + +def test_custom_field_car_wrap_other_field(): + class MyField(field.CustomField): + @property + def builtin_type(self): + return field.Text(**self._params) + + assert {"type": "text", "index": "not_analyzed"} == MyField( + index="not_analyzed" + ).to_dict() + + +def test_field_from_dict(): + f = field.construct_field({"type": "text", "index": "not_analyzed"}) + + assert isinstance(f, field.Text) + assert {"type": "text", "index": "not_analyzed"} == f.to_dict() + + +def test_multi_fields_are_accepted_and_parsed(): + f = field.construct_field( + "text", + fields={"raw": {"type": "keyword"}, "eng": field.Text(analyzer="english")}, + ) + + assert isinstance(f, field.Text) + assert { + "type": "text", + "fields": { + "raw": {"type": "keyword"}, + "eng": {"type": "text", "analyzer": "english"}, + }, + } == f.to_dict() + + +def test_nested_provides_direct_access_to_its_fields(): + f = field.Nested(properties={"name": {"type": "text", "index": "not_analyzed"}}) + + assert "name" in f + assert f["name"] == field.Text(index="not_analyzed") + + +def test_field_supports_multiple_analyzers(): + f = field.Text(analyzer="snowball", search_analyzer="keyword") + assert { + "analyzer": "snowball", + "search_analyzer": "keyword", + "type": "text", + } == f.to_dict() + + +def test_multifield_supports_multiple_analyzers(): + f = field.Text( + fields={ + "f1": field.Text(search_analyzer="keyword", analyzer="snowball"), + "f2": field.Text(analyzer="keyword"), + } + ) + assert { + "fields": { + "f1": { + "analyzer": "snowball", + "search_analyzer": "keyword", + "type": "text", + }, + "f2": {"analyzer": "keyword", "type": "text"}, + }, + "type": "text", + } == f.to_dict() + + +def test_scaled_float(): + with pytest.raises(TypeError): + field.ScaledFloat() + f = field.ScaledFloat(123) + assert f.to_dict() == {"scaling_factor": 123, "type": "scaled_float"} + + +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") +def test_ipaddress(): + f = field.Ip() + assert f.deserialize("127.0.0.1") == ip_address("127.0.0.1") + assert f.deserialize("::1") == ip_address("::1") + assert f.serialize(f.deserialize("::1")) == "::1" + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_an_ipaddress") + + +def test_float(): + f = field.Float() + assert f.deserialize("42") == 42.0 + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_a_float") + + +def test_integer(): + f = field.Integer() + assert f.deserialize("42") == 42 + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_an_integer") + + +def test_binary(): + f = field.Binary() + assert f.deserialize(base64.b64encode(b"42")) == b"42" + assert f.deserialize(f.serialize(b"42")) == b"42" + assert f.deserialize(None) is None + + +def test_constant_keyword(): + f = field.ConstantKeyword() + assert f.to_dict() == {"type": "constant_keyword"} + + +def test_rank_features(): + f = field.RankFeatures() + assert f.to_dict() == {"type": "rank_features"} + + +def test_object_dynamic_values(): + for dynamic in True, False, "strict": + f = field.Object(dynamic=dynamic) + assert f.to_dict()["dynamic"] == dynamic + + +def test_object_disabled(): + f = field.Object(enabled=False) + assert f.to_dict() == {"type": "object", "enabled": False} + + +def test_object_constructor(): + expected = {"type": "object", "properties": {"inner_int": {"type": "integer"}}} + + class Inner(InnerDoc): + inner_int = field.Integer() + + obj_from_doc = field.Object(doc_class=Inner) + assert obj_from_doc.to_dict() == expected + + obj_from_props = field.Object(properties={"inner_int": field.Integer()}) + assert obj_from_props.to_dict() == expected + + with pytest.raises(ValidationException): + field.Object(doc_class=Inner, properties={"inner_int": field.Integer()}) + + with pytest.raises(ValidationException): + field.Object(doc_class=Inner, dynamic=False) diff --git a/test_opensearchpy/test_helpers/test_index.py b/test_opensearchpy/test_helpers/test_index.py new file mode 100644 index 00000000..40048bc6 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_index.py @@ -0,0 +1,196 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import string +from random import choice + +from pytest import raises + +from opensearchpy import Date, Document, Index, IndexTemplate, Text, analyzer + + +class Post(Document): + title = Text() + published_from = Date() + + +def test_multiple_doc_types_will_combine_mappings(): + class User(Document): + username = Text() + + i = Index("i") + i.document(Post) + i.document(User) + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "username": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_search_is_limited_to_index_name(): + i = Index("my-index") + s = i.search() + + assert s._index == ["my-index"] + + +def test_cloned_index_has_copied_settings_and_using(): + client = object() + i = Index("my-index", using=client) + i.settings(number_of_shards=1) + + i2 = i.clone("my-other-index") + + assert "my-other-index" == i2._name + assert client is i2._using + assert i._settings == i2._settings + assert i._settings is not i2._settings + + +def test_cloned_index_has_analysis_attribute(): + """ + Regression test for Issue #582 in which `Index.clone()` was not copying + over the `_analysis` attribute. + """ + client = object() + i = Index("my-index", using=client) + + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + i.analyzer(random_analyzer) + + i2 = i.clone("my-clone-index") + + assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"] + + +def test_settings_are_saved(): + i = Index("i") + i.settings(number_of_replicas=0) + i.settings(number_of_shards=1) + + assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict() + + +def test_registered_doc_type_included_in_to_dict(): + i = Index("i", using="alias") + i.document(Post) + + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_registered_doc_type_included_in_search(): + i = Index("i", using="alias") + i.document(Post) + + s = i.search() + + assert s._doc_type == [Post] + + +def test_aliases_add_to_object(): + random_alias = "".join((choice(string.ascii_letters) for _ in range(100))) + alias_dict = {random_alias: {}} + + index = Index("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == alias_dict + + +def test_aliases_returned_from_to_dict(): + random_alias = "".join((choice(string.ascii_letters) for _ in range(100))) + alias_dict = {random_alias: {}} + + index = Index("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == index.to_dict()["aliases"] == alias_dict + + +def test_analyzers_added_to_object(): + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + index = Index("i", using="alias") + index.analyzer(random_analyzer) + + assert index._analysis["analyzer"][random_analyzer_name] == { + "filter": ["standard"], + "type": "custom", + "tokenizer": "standard", + } + + +def test_analyzers_returned_from_to_dict(): + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + index = Index("i", using="alias") + index.analyzer(random_analyzer) + + assert index.to_dict()["settings"]["analysis"]["analyzer"][ + random_analyzer_name + ] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} + + +def test_conflicting_analyzer_raises_error(): + i = Index("i") + i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"]) + + with raises(ValueError): + i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"]) + + +def test_index_template_can_have_order(): + i = Index("i-*") + it = i.as_template("i", order=2) + + assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict() + + +def test_index_template_save_result(mock_client): + it = IndexTemplate("test-template", "test-*") + + assert it.save(using="mock") == mock_client.indices.put_template() diff --git a/test_opensearchpy/test_helpers/test_mapping.py b/test_opensearchpy/test_helpers/test_mapping.py new file mode 100644 index 00000000..822440a4 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_mapping.py @@ -0,0 +1,232 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + +from opensearchpy import Keyword, Nested, Text +from opensearchpy.helpers import analysis, mapping + + +def test_mapping_can_has_fields(): + m = mapping.Mapping() + m.field("name", "text").field("tags", "keyword") + + assert { + "properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}} + } == m.to_dict() + + +def test_mapping_update_is_recursive(): + m1 = mapping.Mapping() + m1.field("title", "text") + m1.field("author", "object") + m1.field("author", "object", properties={"name": {"type": "text"}}) + m1.meta("_all", enabled=False) + m1.meta("dynamic", False) + + m2 = mapping.Mapping() + m2.field("published_from", "date") + m2.field("author", "object", properties={"email": {"type": "text"}}) + m2.field("title", "text") + m2.field("lang", "keyword") + m2.meta("_analyzer", path="lang") + + m1.update(m2, update_only=True) + + assert { + "_all": {"enabled": False}, + "_analyzer": {"path": "lang"}, + "dynamic": False, + "properties": { + "published_from": {"type": "date"}, + "title": {"type": "text"}, + "lang": {"type": "keyword"}, + "author": { + "type": "object", + "properties": {"name": {"type": "text"}, "email": {"type": "text"}}, + }, + }, + } == m1.to_dict() + + +def test_properties_can_iterate_over_all_the_fields(): + m = mapping.Mapping() + m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) + m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) + + assert {"f1", "f2", "f3", "f4"} == { + f.test_attr for f in m.properties._collect_fields() + } + + +def test_mapping_can_collect_all_analyzers_and_normalizers(): + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer("english") + a3 = analysis.analyzer("unknown_custom") + a4 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") + n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) + n2 = analysis.normalizer( + "my_normalizer2", + filter=[ + "my_filter1", + "my_filter2", + analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), + ], + ) + n3 = analysis.normalizer("unknown_custom") + + m = mapping.Mapping() + m.field( + "title", + "text", + analyzer=a1, + fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)}, + ) + m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) + m.field("normalized_title", "keyword", normalizer=n1) + m.field("normalized_comment", "keyword", normalizer=n2) + m.field("unknown", "keyword", normalizer=n3) + m.meta("_all", analyzer=a5) + + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + "my_analyzer3": {"tokenizer": "keyword", "type": "custom"}, + }, + "normalizer": { + "my_normalizer1": {"filter": ["lowercase"], "type": "custom"}, + "my_normalizer2": { + "filter": ["my_filter1", "my_filter2", "my_filter3"], + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + "my_filter3": {"stopwords": ["e", "f"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + assert json.loads(json.dumps(m.to_dict())) == m.to_dict() + + +def test_mapping_can_collect_multiple_analyzers(): + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + m = mapping.Mapping() + m.field("title", "text", analyzer=a1, search_analyzer=a2) + m.field( + "text", + "text", + analyzer=a1, + fields={ + "english": Text(analyzer=a1), + "unknown": Keyword(analyzer=a1, search_analyzer=a2), + }, + ) + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + +def test_even_non_custom_analyzers_can_have_params(): + a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") + m = mapping.Mapping() + m.field("title", "text", analyzer=a1) + + assert { + "analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}} + } == m._collect_analysis() + + +def test_resolve_field_can_resolve_multifields(): + m = mapping.Mapping() + m.field("title", "text", fields={"keyword": Keyword()}) + + assert isinstance(m.resolve_field("title.keyword"), Keyword) + + +def test_resolve_nested(): + m = mapping.Mapping() + m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) + m.field("k2", "keyword") + + nested, field = m.resolve_nested("n1.n2.k1") + assert nested == ["n1", "n1.n2"] + assert isinstance(field, Keyword) + + nested, field = m.resolve_nested("k2") + assert nested == [] + assert isinstance(field, Keyword) diff --git a/test_opensearchpy/test_helpers/test_query.py b/test_opensearchpy/test_helpers/test_query.py new file mode 100644 index 00000000..46707f2c --- /dev/null +++ b/test_opensearchpy/test_helpers/test_query.py @@ -0,0 +1,564 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from opensearchpy.helpers import function, query + + +def test_empty_Q_is_match_all(): + q = query.Q() + + assert isinstance(q, query.MatchAll) + assert query.MatchAll() == q + + +def test_match_to_dict(): + assert {"match": {"f": "value"}} == query.Match(f="value").to_dict() + + +def test_match_to_dict_extra(): + assert {"match": {"f": "value", "boost": 2}} == query.Match( + f="value", boost=2 + ).to_dict() + + +def test_fuzzy_to_dict(): + assert {"fuzzy": {"f": "value"}} == query.Fuzzy(f="value").to_dict() + + +def test_prefix_to_dict(): + assert {"prefix": {"f": "value"}} == query.Prefix(f="value").to_dict() + + +def test_term_to_dict(): + assert {"term": {"_type": "article"}} == query.Term(_type="article").to_dict() + + +def test_bool_to_dict(): + bool = query.Bool(must=[query.Match(f="value")], should=[]) + + assert {"bool": {"must": [{"match": {"f": "value"}}]}} == bool.to_dict() + + +def test_dismax_to_dict(): + assert {"dis_max": {"queries": [{"term": {"_type": "article"}}]}} == query.DisMax( + queries=[query.Term(_type="article")] + ).to_dict() + + +def test_bool_from_dict_issue_318(): + d = {"bool": {"must_not": {"match": {"field": "value"}}}} + q = query.Q(d) + + assert q == ~query.Match(field="value") + + +def test_repr(): + bool = query.Bool(must=[query.Match(f="value")], should=[]) + + assert "Bool(must=[Match(f='value')])" == repr(bool) + + +def test_query_clone(): + bool = query.Bool( + must=[query.Match(x=42)], + should=[query.Match(g="v2")], + must_not=[query.Match(title="value")], + ) + bool_clone = bool._clone() + + assert bool == bool_clone + assert bool is not bool_clone + + +def test_bool_converts_its_init_args_to_queries(): + q = query.Bool(must=[{"match": {"f": "value"}}]) + + assert len(q.must) == 1 + assert q.must[0] == query.Match(f="value") + + +def test_two_queries_make_a_bool(): + q1 = query.Match(f="value1") + q2 = query.Match(message={"query": "this is a test", "opeartor": "and"}) + q = q1 & q2 + + assert isinstance(q, query.Bool) + assert [q1, q2] == q.must + + +def test_other_and_bool_appends_other_to_must(): + q1 = query.Match(f="value1") + qb = query.Bool() + + q = q1 & qb + assert q is not qb + assert q.must[0] == q1 + + +def test_bool_and_other_appends_other_to_must(): + q1 = query.Match(f="value1") + qb = query.Bool() + + q = qb & q1 + assert q is not qb + assert q.must[0] == q1 + + +def test_bool_and_other_sets_min_should_match_if_needed(): + q1 = query.Q("term", category=1) + q2 = query.Q( + "bool", should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")] + ) + + q = q1 & q2 + assert q == query.Bool( + must=[q1], + should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")], + minimum_should_match=1, + ) + + +def test_bool_with_different_minimum_should_match_should_not_be_combined(): + q1 = query.Q( + "bool", + minimum_should_match=2, + should=[ + query.Q("term", field="aa1"), + query.Q("term", field="aa2"), + query.Q("term", field="aa3"), + query.Q("term", field="aa4"), + ], + ) + q2 = query.Q( + "bool", + minimum_should_match=3, + should=[ + query.Q("term", field="bb1"), + query.Q("term", field="bb2"), + query.Q("term", field="bb3"), + query.Q("term", field="bb4"), + ], + ) + q3 = query.Q( + "bool", + minimum_should_match=4, + should=[ + query.Q("term", field="cc1"), + query.Q("term", field="cc2"), + query.Q("term", field="cc3"), + query.Q("term", field="cc4"), + ], + ) + + q4 = q1 | q2 + assert q4 == query.Bool(should=[q1, q2]) + + q5 = q1 | q2 | q3 + assert q5 == query.Bool(should=[q1, q2, q3]) + + +def test_empty_bool_has_min_should_match_0(): + assert 0 == query.Bool()._min_should_match + + +def test_query_and_query_creates_bool(): + q1 = query.Match(f=42) + q2 = query.Match(g=47) + + q = q1 & q2 + assert isinstance(q, query.Bool) + assert q.must == [q1, q2] + + +def test_match_all_and_query_equals_other(): + q1 = query.Match(f=42) + q2 = query.MatchAll() + + q = q1 & q2 + assert q1 == q + + +def test_not_match_all_is_match_none(): + q = query.MatchAll() + + assert ~q == query.MatchNone() + + +def test_not_match_none_is_match_all(): + q = query.MatchNone() + + assert ~q == query.MatchAll() + + +def test_invert_empty_bool_is_match_none(): + q = query.Bool() + + assert ~q == query.MatchNone() + + +def test_match_none_or_query_equals_query(): + q1 = query.Match(f=42) + q2 = query.MatchNone() + + assert q1 | q2 == query.Match(f=42) + + +def test_match_none_and_query_equals_match_none(): + q1 = query.Match(f=42) + q2 = query.MatchNone() + + assert q1 & q2 == query.MatchNone() + + +def test_bool_and_bool(): + qt1, qt2, qt3 = query.Match(f=1), query.Match(f=2), query.Match(f=3) + + q1 = query.Bool(must=[qt1], should=[qt2]) + q2 = query.Bool(must_not=[qt3]) + assert q1 & q2 == query.Bool( + must=[qt1], must_not=[qt3], should=[qt2], minimum_should_match=0 + ) + + q1 = query.Bool(must=[qt1], should=[qt1, qt2]) + q2 = query.Bool(should=[qt3]) + assert q1 & q2 == query.Bool( + must=[qt1, qt3], should=[qt1, qt2], minimum_should_match=0 + ) + + +def test_bool_and_bool_with_min_should_match(): + qt1, qt2 = query.Match(f=1), query.Match(f=2) + q1 = query.Q("bool", minimum_should_match=1, should=[qt1]) + q2 = query.Q("bool", minimum_should_match=1, should=[qt2]) + + assert query.Q("bool", must=[qt1, qt2]) == q1 & q2 + + +def test_inverted_query_becomes_bool_with_must_not(): + q = query.Match(f=42) + + assert ~q == query.Bool(must_not=[query.Match(f=42)]) + + +def test_inverted_query_with_must_not_become_should(): + q = query.Q("bool", must_not=[query.Q("match", f=1), query.Q("match", f=2)]) + + assert ~q == query.Q("bool", should=[query.Q("match", f=1), query.Q("match", f=2)]) + + +def test_inverted_query_with_must_and_must_not(): + q = query.Q( + "bool", + must=[query.Q("match", f=3), query.Q("match", f=4)], + must_not=[query.Q("match", f=1), query.Q("match", f=2)], + ) + print((~q).to_dict()) + assert ~q == query.Q( + "bool", + should=[ + # negation of must + query.Q("bool", must_not=[query.Q("match", f=3)]), + query.Q("bool", must_not=[query.Q("match", f=4)]), + # negation of must_not + query.Q("match", f=1), + query.Q("match", f=2), + ], + ) + + +def test_double_invert_returns_original_query(): + q = query.Match(f=42) + + assert q == ~~q + + +def test_bool_query_gets_inverted_internally(): + q = query.Bool(must_not=[query.Match(f=42)], must=[query.Match(g="v")]) + + assert ~q == query.Bool( + should=[ + # negating must + query.Bool(must_not=[query.Match(g="v")]), + # negating must_not + query.Match(f=42), + ] + ) + + +def test_match_all_or_something_is_match_all(): + q1 = query.MatchAll() + q2 = query.Match(f=42) + + assert (q1 | q2) == query.MatchAll() + assert (q2 | q1) == query.MatchAll() + + +def test_or_produces_bool_with_should(): + q1 = query.Match(f=42) + q2 = query.Match(g="v") + + q = q1 | q2 + assert q == query.Bool(should=[q1, q2]) + + +def test_or_bool_doesnt_loop_infinitely_issue_37(): + q = query.Match(f=42) | ~query.Match(f=47) + + assert q == query.Bool( + should=[query.Bool(must_not=[query.Match(f=47)]), query.Match(f=42)] + ) + + +def test_or_bool_doesnt_loop_infinitely_issue_96(): + q = ~query.Match(f=42) | ~query.Match(f=47) + + assert q == query.Bool( + should=[ + query.Bool(must_not=[query.Match(f=42)]), + query.Bool(must_not=[query.Match(f=47)]), + ] + ) + + +def test_bool_will_append_another_query_with_or(): + qb = query.Bool(should=[query.Match(f="v"), query.Match(f="v2")]) + q = query.Match(g=42) + + assert (q | qb) == query.Bool(should=[query.Match(f="v"), query.Match(f="v2"), q]) + + +def test_bool_queries_with_only_should_get_concatenated(): + q1 = query.Bool(should=[query.Match(f=1), query.Match(f=2)]) + q2 = query.Bool(should=[query.Match(f=3), query.Match(f=4)]) + + assert (q1 | q2) == query.Bool( + should=[query.Match(f=1), query.Match(f=2), query.Match(f=3), query.Match(f=4)] + ) + + +def test_two_bool_queries_append_one_to_should_if_possible(): + q1 = query.Bool(should=[query.Match(f="v")]) + q2 = query.Bool(must=[query.Match(f="v")]) + + assert (q1 | q2) == query.Bool( + should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])] + ) + assert (q2 | q1) == query.Bool( + should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])] + ) + + +def test_queries_are_registered(): + assert "match" in query.Query._classes + assert query.Query._classes["match"] is query.Match + + +def test_defining_query_registers_it(): + class MyQuery(query.Query): + name = "my_query" + + assert "my_query" in query.Query._classes + assert query.Query._classes["my_query"] is MyQuery + + +def test_Q_passes_query_through(): + q = query.Match(f="value1") + + assert query.Q(q) is q + + +def test_Q_constructs_query_by_name(): + q = query.Q("match", f="value") + + assert isinstance(q, query.Match) + assert {"f": "value"} == q._params + + +def test_Q_translates_double_underscore_to_dots_in_param_names(): + q = query.Q("match", comment__author="honza") + + assert {"comment.author": "honza"} == q._params + + +def test_Q_doesn_translate_double_underscore_to_dots_in_param_names(): + q = query.Q("match", comment__author="honza", _expand__to_dot=False) + + assert {"comment__author": "honza"} == q._params + + +def test_Q_constructs_simple_query_from_dict(): + q = query.Q({"match": {"f": "value"}}) + + assert isinstance(q, query.Match) + assert {"f": "value"} == q._params + + +def test_Q_constructs_compound_query_from_dict(): + q = query.Q({"bool": {"must": [{"match": {"f": "value"}}]}}) + + assert q == query.Bool(must=[query.Match(f="value")]) + + +def test_Q_raises_error_when_passed_in_dict_and_params(): + with raises(Exception): + query.Q({"match": {"f": "value"}}, f="value") + + +def test_Q_raises_error_when_passed_in_query_and_params(): + q = query.Match(f="value1") + + with raises(Exception): + query.Q(q, f="value") + + +def test_Q_raises_error_on_unknown_query(): + with raises(Exception): + query.Q("not a query", f="value") + + +def test_match_all_and_anything_is_anything(): + q = query.MatchAll() + + s = query.Match(f=42) + assert q & s == s + assert s & q == s + + +def test_function_score_with_functions(): + q = query.Q( + "function_score", + functions=[query.SF("script_score", script="doc['comment_count'] * _score")], + ) + + assert { + "function_score": { + "functions": [{"script_score": {"script": "doc['comment_count'] * _score"}}] + } + } == q.to_dict() + + +def test_function_score_with_no_function_is_boost_factor(): + q = query.Q( + "function_score", + functions=[query.SF({"weight": 20, "filter": query.Q("term", f=42)})], + ) + + assert { + "function_score": {"functions": [{"filter": {"term": {"f": 42}}, "weight": 20}]} + } == q.to_dict() + + +def test_function_score_to_dict(): + q = query.Q( + "function_score", + query=query.Q("match", title="python"), + functions=[ + query.SF("random_score"), + query.SF( + "field_value_factor", + field="comment_count", + filter=query.Q("term", tags="python"), + ), + ], + ) + + d = { + "function_score": { + "query": {"match": {"title": "python"}}, + "functions": [ + {"random_score": {}}, + { + "filter": {"term": {"tags": "python"}}, + "field_value_factor": {"field": "comment_count"}, + }, + ], + } + } + assert d == q.to_dict() + + +def test_function_score_with_single_function(): + d = { + "function_score": { + "filter": {"term": {"tags": "python"}}, + "script_score": {"script": "doc['comment_count'] * _score"}, + } + } + + q = query.Q(d) + assert isinstance(q, query.FunctionScore) + assert isinstance(q.filter, query.Term) + assert len(q.functions) == 1 + + sf = q.functions[0] + assert isinstance(sf, function.ScriptScore) + assert "doc['comment_count'] * _score" == sf.script + + +def test_function_score_from_dict(): + d = { + "function_score": { + "filter": {"term": {"tags": "python"}}, + "functions": [ + { + "filter": {"terms": {"tags": "python"}}, + "script_score": {"script": "doc['comment_count'] * _score"}, + }, + {"boost_factor": 6}, + ], + } + } + + q = query.Q(d) + assert isinstance(q, query.FunctionScore) + assert isinstance(q.filter, query.Term) + assert len(q.functions) == 2 + + sf = q.functions[0] + assert isinstance(sf, function.ScriptScore) + assert isinstance(sf.filter, query.Terms) + + sf = q.functions[1] + assert isinstance(sf, function.BoostFactor) + assert 6 == sf.value + assert {"boost_factor": 6} == sf.to_dict() + + +def test_script_score(): + d = { + "script_score": { + "query": {"match_all": {}}, + "script": {"source": "...", "params": {}}, + } + } + q = query.Q(d) + + assert isinstance(q, query.ScriptScore) + assert isinstance(q.query, query.MatchAll) + assert q.script == {"source": "...", "params": {}} + assert q.to_dict() == d diff --git a/test_opensearchpy/test_helpers/test_result.py b/test_opensearchpy/test_helpers/test_result.py new file mode 100644 index 00000000..f07c633b --- /dev/null +++ b/test_opensearchpy/test_helpers/test_result.py @@ -0,0 +1,204 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle +from datetime import date + +from pytest import fixture, raises + +from opensearchpy import Date, Document, Object, Search +from opensearchpy.helpers import response +from opensearchpy.helpers.aggs import Terms +from opensearchpy.helpers.response.aggs import AggResponse, Bucket, BucketData + + +@fixture +def agg_response(aggs_search, aggs_data): + return response.Response(aggs_search, aggs_data) + + +def test_agg_response_is_pickleable(agg_response): + agg_response.hits + r = pickle.loads(pickle.dumps(agg_response)) + + assert r == agg_response + assert r._search == agg_response._search + assert r.hits == agg_response.hits + + +def test_response_is_pickleable(dummy_response): + res = response.Response(Search(), dummy_response) + res.hits + r = pickle.loads(pickle.dumps(res)) + + assert r == res + assert r._search == res._search + assert r.hits == res.hits + + +def test_hit_is_pickleable(dummy_response): + res = response.Response(Search(), dummy_response) + hits = pickle.loads(pickle.dumps(res.hits)) + + assert hits == res.hits + assert hits[0].meta == res.hits[0].meta + + +def test_response_stores_search(dummy_response): + s = Search() + r = response.Response(s, dummy_response) + + assert r._search is s + + +def test_interactive_helpers(dummy_response): + res = response.Response(Search(), dummy_response) + hits = res.hits + h = hits[0] + + rhits = ( + "[, , " + ", ]" + ).format( + repr(dummy_response["hits"]["hits"][0]["_source"]), + repr(dummy_response["hits"]["hits"][1]["_source"])[:60], + repr(dummy_response["hits"]["hits"][2]["_source"])[:60], + ) + + assert res + assert "" % rhits == repr(res) + assert rhits == repr(hits) + assert {"meta", "city", "name"} == set(dir(h)) + assert "" % dummy_response["hits"]["hits"][0][ + "_source" + ] == repr(h) + + +def test_empty_response_is_false(dummy_response): + dummy_response["hits"]["hits"] = [] + res = response.Response(Search(), dummy_response) + + assert not res + + +def test_len_response(dummy_response): + res = response.Response(Search(), dummy_response) + assert len(res) == 4 + + +def test_iterating_over_response_gives_you_hits(dummy_response): + res = response.Response(Search(), dummy_response) + hits = list(h for h in res) + + assert res.success() + assert 123 == res.took + assert 4 == len(hits) + assert all(isinstance(h, response.Hit) for h in hits) + h = hits[0] + + assert "test-index" == h.meta.index + assert "opensearch" == h.meta.id + assert 12 == h.meta.score + + assert hits[1].meta.routing == "opensearch" + + +def test_hits_get_wrapped_to_contain_additional_attrs(dummy_response): + res = response.Response(Search(), dummy_response) + hits = res.hits + + assert 123 == hits.total + assert 12.0 == hits.max_score + + +def test_hits_provide_dot_and_bracket_access_to_attrs(dummy_response): + res = response.Response(Search(), dummy_response) + h = res.hits[0] + + assert "OpenSearch" == h.name + assert "OpenSearch" == h["name"] + + assert "Honza" == res.hits[2].name.first + + with raises(KeyError): + h["not_there"] + + with raises(AttributeError): + h.not_there + + +def test_slicing_on_response_slices_on_hits(dummy_response): + res = response.Response(Search(), dummy_response) + + assert res[0] is res.hits[0] + assert res[::-1] == res.hits[::-1] + + +def test_aggregation_base(agg_response): + assert agg_response.aggs is agg_response.aggregations + assert isinstance(agg_response.aggs, response.AggResponse) + + +def test_metric_agg_works(agg_response): + assert 25052.0 == agg_response.aggs.sum_lines.value + + +def test_aggregations_can_be_iterated_over(agg_response): + aggs = [a for a in agg_response.aggs] + + assert len(aggs) == 3 + assert all(map(lambda a: isinstance(a, AggResponse), aggs)) + + +def test_aggregations_can_be_retrieved_by_name(agg_response, aggs_search): + a = agg_response.aggs["popular_files"] + + assert isinstance(a, BucketData) + assert isinstance(a._meta["aggs"], Terms) + assert a._meta["aggs"] is aggs_search.aggs.aggs["popular_files"] + + +def test_bucket_response_can_be_iterated_over(agg_response): + popular_files = agg_response.aggregations.popular_files + + buckets = [b for b in popular_files] + assert all(isinstance(b, Bucket) for b in buckets) + assert buckets == popular_files.buckets + + +def test_bucket_keys_get_deserialized(aggs_data, aggs_search): + class Commit(Document): + info = Object(properties={"committed_date": Date()}) + + class Index: + name = "test-commit" + + aggs_search = aggs_search.doc_type(Commit) + agg_response = response.Response(aggs_search, aggs_data) + + per_month = agg_response.aggregations.per_month + for b in per_month: + assert isinstance(b.key, date) diff --git a/test_opensearchpy/test_helpers/test_search.py b/test_opensearchpy/test_helpers/test_search.py new file mode 100644 index 00000000..afb032fe --- /dev/null +++ b/test_opensearchpy/test_helpers/test_search.py @@ -0,0 +1,589 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy + +from pytest import raises + +from opensearchpy import Document, Q +from opensearchpy.exceptions import IllegalOperation +from opensearchpy.helpers import query, search + + +def test_expand__to_dot_is_respected(): + s = search.Search().query("match", a__b=42, _expand__to_dot=False) + + assert {"query": {"match": {"a__b": 42}}} == s.to_dict() + + +def test_execute_uses_cache(): + s = search.Search() + r = object() + s._response = r + + assert r is s.execute() + + +def test_cache_can_be_ignored(mock_client): + s = search.Search(using="mock") + r = object() + s._response = r + s.execute(ignore_cache=True) + + mock_client.search.assert_called_once_with(index=None, body={}) + + +def test_iter_iterates_over_hits(): + s = search.Search() + s._response = [1, 2, 3] + + assert [1, 2, 3] == list(s) + + +def test_cache_isnt_cloned(): + s = search.Search() + s._response = object() + + assert not hasattr(s._clone(), "_response") + + +def test_search_starts_with_no_query(): + s = search.Search() + + assert s.query._proxied is None + + +def test_search_query_combines_query(): + s = search.Search() + + s2 = s.query("match", f=42) + assert s2.query._proxied == query.Match(f=42) + assert s.query._proxied is None + + s3 = s2.query("match", f=43) + assert s2.query._proxied == query.Match(f=42) + assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)]) + + +def test_query_can_be_assigned_to(): + s = search.Search() + + q = Q("match", title="python") + s.query = q + + assert s.query._proxied is q + + +def test_query_can_be_wrapped(): + s = search.Search().query("match", title="python") + + s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"}) + + assert { + "query": { + "function_score": { + "functions": [{"field_value_factor": {"field": "rating"}}], + "query": {"match": {"title": "python"}}, + } + } + } == s.to_dict() + + +def test_using(): + o = object() + o2 = object() + s = search.Search(using=o) + assert s._using is o + s2 = s.using(o2) + assert s._using is o + assert s2._using is o2 + + +def test_methods_are_proxied_to_the_query(): + s = search.Search().query("match_all") + + assert s.query.to_dict() == {"match_all": {}} + + +def test_query_always_returns_search(): + s = search.Search() + + assert isinstance(s.query("match", f=42), search.Search) + + +def test_source_copied_on_clone(): + s = search.Search().source(False) + assert s._clone()._source == s._source + assert s._clone()._source is False + + s2 = search.Search().source([]) + assert s2._clone()._source == s2._source + assert s2._source == [] + + s3 = search.Search().source(["some", "fields"]) + assert s3._clone()._source == s3._source + assert s3._clone()._source == ["some", "fields"] + + +def test_copy_clones(): + from copy import copy + + s1 = search.Search().source(["some", "fields"]) + s2 = copy(s1) + + assert s1 == s2 + assert s1 is not s2 + + +def test_aggs_allow_two_metric(): + s = search.Search() + + s.aggs.metric("a", "max", field="a").metric("b", "max", field="b") + + assert s.to_dict() == { + "aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}} + } + + +def test_aggs_get_copied_on_change(): + s = search.Search().query("match_all") + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + + s2 = s.query("match_all") + s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month") + s3 = s2.query("match_all") + s3.aggs["per_month"].metric("max_score", "max", field="score") + s4 = s3._clone() + s4.aggs.metric("max_score", "max", field="score") + + d = { + "query": {"match_all": {}}, + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + } + + assert d == s.to_dict() + d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}} + assert d == s2.to_dict() + d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}} + assert d == s3.to_dict() + d["aggs"]["max_score"] = {"max": {"field": "score"}} + assert d == s4.to_dict() + + +def test_search_index(): + s = search.Search(index="i") + assert s._index == ["i"] + s = s.index("i2") + assert s._index == ["i", "i2"] + s = s.index("i3") + assert s._index == ["i", "i2", "i3"] + s = s.index() + assert s._index is None + s = search.Search(index=("i", "i2")) + assert s._index == ["i", "i2"] + s = search.Search(index=["i", "i2"]) + assert s._index == ["i", "i2"] + s = search.Search() + s = s.index("i", "i2") + assert s._index == ["i", "i2"] + s2 = s.index("i3") + assert s._index == ["i", "i2"] + assert s2._index == ["i", "i2", "i3"] + s = search.Search() + s = s.index(["i", "i2"], "i3") + assert s._index == ["i", "i2", "i3"] + s2 = s.index("i4") + assert s._index == ["i", "i2", "i3"] + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(["i4"]) + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(("i4", "i5")) + assert s2._index == ["i", "i2", "i3", "i4", "i5"] + + +def test_doc_type_document_class(): + class MyDocument(Document): + pass + + s = search.Search(doc_type=MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + s = search.Search().doc_type(MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + +def test_sort(): + s = search.Search() + s = s.sort("fielda", "-fieldb") + + assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort + assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict() + + s = s.sort() + assert [] == s._sort + assert search.Search().to_dict() == s.to_dict() + + +def test_sort_by_score(): + s = search.Search() + s = s.sort("_score") + assert {"sort": ["_score"]} == s.to_dict() + + s = search.Search() + with raises(IllegalOperation): + s.sort("-_score") + + +def test_slice(): + s = search.Search() + assert {"from": 3, "size": 7} == s[3:10].to_dict() + assert {"from": 0, "size": 5} == s[:5].to_dict() + assert {"from": 3, "size": 10} == s[3:].to_dict() + assert {"from": 0, "size": 0} == s[0:0].to_dict() + assert {"from": 20, "size": 0} == s[20:0].to_dict() + + +def test_index(): + s = search.Search() + assert {"from": 3, "size": 1} == s[3].to_dict() + + +def test_search_to_dict(): + s = search.Search() + assert {} == s.to_dict() + + s = s.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == s.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10) + + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + d = { + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + "query": {"match": {"f": 42}}, + } + assert d == s.to_dict() + + s = search.Search(extra={"size": 5}) + assert {"size": 5} == s.to_dict() + s = s.extra(from_=42) + assert {"size": 5, "from": 42} == s.to_dict() + + +def test_complex_example(): + s = search.Search() + s = ( + s.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .post_filter("terms", tags=["prague", "czech"]) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + + s.aggs.bucket("per_country", "terms", field="country").metric( + "avg_attendees", "avg", field="attendees" + ) + + s.query.minimum_should_match = 2 + + s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50) + + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "highlight": { + "order": "score", + "fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}}, + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_reverse(): + d = { + "query": { + "filtered": { + "filter": { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + }, + "query": { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + } + }, + "post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "sort": ["title", {"category": {"order": "desc"}}, "_score"], + "size": 5, + "highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}}, + "suggest": { + "my-title-suggestions-1": { + "text": "devloping distibutd saerch engies", + "term": {"size": 3, "field": "title"}, + } + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } + + d2 = deepcopy(d) + + s = search.Search.from_dict(d) + + # make sure we haven't modified anything in place + assert d == d2 + assert {"size": 5} == s._extra + assert d == s.to_dict() + + +def test_from_dict_doesnt_need_query(): + s = search.Search.from_dict({"size": 5}) + + assert {"size": 5} == s.to_dict() + + +def test_params_being_passed_to_search(mock_client): + s = search.Search(using="mock") + s = s.params(routing="42") + s.execute() + + mock_client.search.assert_called_once_with(index=None, body={}, routing="42") + + +def test_source(): + assert {} == search.Search().source().to_dict() + + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]} + } == search.Search().source(includes=["foo.bar.*"], excludes=["foo.one"]).to_dict() + + assert {"_source": False} == search.Search().source(False).to_dict() + + assert {"_source": ["f1", "f2"]} == search.Search().source( + includes=["foo.bar.*"], excludes=["foo.one"] + ).source(["f1", "f2"]).to_dict() + + +def test_source_on_clone(): + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == search.Search().source(includes=["foo.bar.*"]).source( + excludes=["foo.one"] + ).filter( + "term", title="python" + ).to_dict() + assert { + "_source": False, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == search.Search().source(False).filter("term", title="python").to_dict() + + +def test_source_on_clear(): + assert ( + {} + == search.Search() + .source(includes=["foo.bar.*"]) + .source(includes=None, excludes=None) + .to_dict() + ) + + +def test_suggest_accepts_global_text(): + s = search.Search.from_dict( + { + "suggest": { + "text": "the amsterdma meetpu", + "my-suggest-1": {"term": {"field": "title"}}, + "my-suggest-2": {"text": "other", "term": {"field": "body"}}, + } + } + ) + + assert { + "suggest": { + "my-suggest-1": { + "term": {"field": "title"}, + "text": "the amsterdma meetpu", + }, + "my-suggest-2": {"term": {"field": "body"}, "text": "other"}, + } + } == s.to_dict() + + +def test_suggest(): + s = search.Search() + s = s.suggest("my_suggestion", "pyhton", term={"field": "title"}) + + assert { + "suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}} + } == s.to_dict() + + +def test_exclude(): + s = search.Search() + s = s.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == s.to_dict() + + +def test_delete_by_query(mock_client): + s = search.Search(using="mock").query("match", lang="java") + s.delete() + + mock_client.delete_by_query.assert_called_once_with( + index=None, body={"query": {"match": {"lang": "java"}}} + ) + + +def test_update_from_dict(): + s = search.Search() + s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) + s.update_from_dict({"_source": ["id", "name"]}) + + assert { + "indices_boost": [{"important-documents": 2}], + "_source": ["id", "name"], + } == s.to_dict() + + +def test_rescore_query_to_dict(): + s = search.Search(index="index-name") + + positive_query = Q( + "function_score", + query=Q("term", tags="a"), + script_score={"script": "_score * 1"}, + ) + + negative_query = Q( + "function_score", + query=Q("term", tags="b"), + script_score={"script": "_score * -100"}, + ) + + s = s.query(positive_query) + s = s.extra( + rescore={"window_size": 100, "query": {"rescore_query": negative_query}} + ) + assert s.to_dict() == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 100, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "b"}}, + "functions": [{"script_score": {"script": "_score * -100"}}], + } + } + }, + }, + } + + assert s.to_dict( + rescore={"window_size": 10, "query": {"rescore_query": positive_query}} + ) == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 10, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + } + }, + }, + } diff --git a/test_opensearchpy/test_helpers/test_update_by_query.py b/test_opensearchpy/test_helpers/test_update_by_query.py new file mode 100644 index 00000000..d298a0a0 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_update_by_query.py @@ -0,0 +1,182 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy + +from opensearchpy import Q, UpdateByQuery +from opensearchpy.helpers.response import UpdateByQueryResponse + + +def test_ubq_starts_with_no_query(): + ubq = UpdateByQuery() + + assert ubq.query._proxied is None + + +def test_ubq_to_dict(): + ubq = UpdateByQuery() + assert {} == ubq.to_dict() + + ubq = ubq.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == ubq.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) + + ubq = UpdateByQuery(extra={"size": 5}) + assert {"size": 5} == ubq.to_dict() + + ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")}) + assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() + + +def test_complex_example(): + ubq = UpdateByQuery() + ubq = ( + ubq.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + ) + + ubq.query.minimum_should_match = 2 + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } == ubq.to_dict() + + +def test_exclude(): + ubq = UpdateByQuery() + ubq = ubq.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == ubq.to_dict() + + +def test_reverse(): + d = { + "query": { + "filtered": { + "filter": { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + }, + "query": { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } + + d2 = deepcopy(d) + + ubq = UpdateByQuery.from_dict(d) + + assert d == d2 + assert d == ubq.to_dict() + + +def test_from_dict_doesnt_need_query(): + ubq = UpdateByQuery.from_dict({"script": {"source": "test"}}) + + assert {"script": {"source": "test"}} == ubq.to_dict() + + +def test_params_being_passed_to_search(mock_client): + ubq = UpdateByQuery(using="mock") + ubq = ubq.params(routing="42") + ubq.execute() + + mock_client.update_by_query.assert_called_once_with( + index=None, body={}, routing="42" + ) + + +def test_overwrite_script(): + ubq = UpdateByQuery() + ubq = ubq.script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + assert { + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + } + } == ubq.to_dict() + ubq = ubq.script(source="ctx._source.likes++") + assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict() + + +def test_update_by_query_response_success(): + ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": []}) + assert ubqr.success() + + ubqr = UpdateByQueryResponse({}, {"timed_out": True, "failures": []}) + assert not ubqr.success() + + ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": [{}]}) + assert not ubqr.success() diff --git a/test_opensearchpy/test_helpers/test_utils.py b/test_opensearchpy/test_helpers/test_utils.py new file mode 100644 index 00000000..7a620736 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_utils.py @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle + +from pytest import raises + +from opensearchpy import Q, serializer +from opensearchpy.helpers import utils + + +def test_attrdict_pickle(): + ad = utils.AttrDict({}) + + pickled_ad = pickle.dumps(ad) + assert ad == pickle.loads(pickled_ad) + + +def test_attrlist_pickle(): + al = utils.AttrList([]) + + pickled_al = pickle.dumps(al) + assert al == pickle.loads(pickled_al) + + +def test_attrlist_slice(): + class MyAttrDict(utils.AttrDict): + pass + + ls = utils.AttrList([{}, {}], obj_wrapper=MyAttrDict) + assert isinstance(ls[:][0], MyAttrDict) + + +def test_merge(): + a = utils.AttrDict({"a": {"b": 42, "c": 47}}) + b = {"a": {"b": 123, "d": -12}, "e": [1, 2, 3]} + + utils.merge(a, b) + + assert a == {"a": {"b": 123, "c": 47, "d": -12}, "e": [1, 2, 3]} + + +def test_merge_conflict(): + for d in ( + {"a": 42}, + {"a": {"b": 47}}, + ): + utils.merge({"a": {"b": 42}}, d) + with raises(ValueError): + utils.merge({"a": {"b": 42}}, d, True) + + +def test_attrdict_bool(): + d = utils.AttrDict({}) + + assert not d + d.title = "Title" + assert d + + +def test_attrlist_items_get_wrapped_during_iteration(): + al = utils.AttrList([1, object(), [1], {}]) + + ls = list(iter(al)) + + assert isinstance(ls[2], utils.AttrList) + assert isinstance(ls[3], utils.AttrDict) + + +def test_serializer_deals_with_Attr_versions(): + d = utils.AttrDict({"key": utils.AttrList([1, 2, 3])}) + + assert serializer.serializer.dumps(d) == serializer.serializer.dumps( + {"key": [1, 2, 3]} + ) + + +def test_serializer_deals_with_objects_with_to_dict(): + class MyClass(object): + def to_dict(self): + return 42 + + assert serializer.serializer.dumps(MyClass()) == "42" + + +def test_recursive_to_dict(): + assert utils.recursive_to_dict({"k": [1, (1.0, {"v": Q("match", key="val")})]}) == { + "k": [1, (1.0, {"v": {"match": {"key": "val"}}})] + } + + +def test_attrdict_get(): + a = utils.AttrDict({"a": {"b": 42, "c": 47}}) + assert a.get("a", {}).get("b", 0) == 42 + assert a.get("a", {}).get("e", 0) == 0 + assert a.get("d", {}) == {} + with raises(AttributeError): + assert a.get("d") diff --git a/test_opensearchpy/test_helpers/test_validation.py b/test_opensearchpy/test_helpers/test_validation.py new file mode 100644 index 00000000..b86f8002 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_validation.py @@ -0,0 +1,171 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +from pytest import raises + +from opensearchpy import ( + Boolean, + Date, + Document, + InnerDoc, + Integer, + Nested, + Object, + Text, +) +from opensearchpy.exceptions import ValidationException + + +class Author(InnerDoc): + name = Text(required=True) + email = Text(required=True) + + def clean(self): + print(self, type(self), self.name) + if self.name.lower() not in self.email: + raise ValidationException("Invalid email!") + + +class BlogPost(Document): + authors = Nested(Author, required=True) + created = Date() + inner = Object() + + +class BlogPostWithStatus(Document): + published = Boolean(required=True) + + +class AutoNowDate(Date): + def clean(self, data): + if data is None: + data = datetime.now() + return super(AutoNowDate, self).clean(data) + + +class Log(Document): + timestamp = AutoNowDate(required=True) + data = Text() + + +def test_required_int_can_be_0(): + class DT(Document): + i = Integer(required=True) + + dt = DT(i=0) + assert dt.full_clean() is None + + +def test_required_field_cannot_be_empty_list(): + class DT(Document): + i = Integer(required=True) + + dt = DT(i=[]) + with raises(ValidationException): + dt.full_clean() + + +def test_validation_works_for_lists_of_values(): + class DT(Document): + i = Date(required=True) + + dt = DT(i=[datetime.now(), "not date"]) + with raises(ValidationException): + dt.full_clean() + + dt = DT(i=[datetime.now(), datetime.now()]) + assert None is dt.full_clean() + + +def test_field_with_custom_clean(): + ls = Log() + ls.full_clean() + + assert isinstance(ls.timestamp, datetime) + + +def test_empty_object(): + d = BlogPost(authors=[{"name": "Guian", "email": "guiang@bitquilltech.com"}]) + d.inner = {} + + d.full_clean() + + +def test_missing_required_field_raises_validation_exception(): + d = BlogPost() + with raises(ValidationException): + d.full_clean() + + d = BlogPost() + d.authors.append({"name": "Guian"}) + with raises(ValidationException): + d.full_clean() + + d = BlogPost() + d.authors.append({"name": "Guian", "email": "guiang@bitquilltech.com"}) + d.full_clean() + + +def test_boolean_doesnt_treat_false_as_empty(): + d = BlogPostWithStatus() + with raises(ValidationException): + d.full_clean() + d.published = False + d.full_clean() + d.published = True + d.full_clean() + + +def test_custom_validation_on_nested_gets_run(): + d = BlogPost(authors=[Author(name="Guian", email="king@example.com")], created=None) + + assert isinstance(d.authors[0], Author) + + with raises(ValidationException): + d.full_clean() + + +def test_accessing_known_fields_returns_empty_value(): + d = BlogPost() + + assert [] == d.authors + + d.authors.append({}) + assert None is d.authors[0].name + assert None is d.authors[0].email + + +def test_empty_values_are_not_serialized(): + d = BlogPost( + authors=[{"name": "Guian", "email": "guiang@bitquilltech.com"}], created=None + ) + + d.full_clean() + assert d.to_dict() == { + "authors": [{"name": "Guian", "email": "guiang@bitquilltech.com"}] + } diff --git a/test_opensearchpy/test_helpers/test_wrappers.py b/test_opensearchpy/test_helpers/test_wrappers.py new file mode 100644 index 00000000..c05b9fc3 --- /dev/null +++ b/test_opensearchpy/test_helpers/test_wrappers.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime, timedelta + +import pytest + +from opensearchpy import Range + + +@pytest.mark.parametrize( + "kwargs, item", + [ + ({}, 1), + ({}, -1), + ({"gte": -1}, -1), + ({"lte": 4}, 4), + ({"lte": 4, "gte": 2}, 4), + ({"lte": 4, "gte": 2}, 2), + ({"gt": datetime.now() - timedelta(seconds=10)}, datetime.now()), + ], +) +def test_range_contains(kwargs, item): + assert item in Range(**kwargs) + + +@pytest.mark.parametrize( + "kwargs, item", + [ + ({"gt": -1}, -1), + ({"lt": 4}, 4), + ({"lt": 4}, 42), + ({"lte": 4, "gte": 2}, 1), + ({"lte": datetime.now() - timedelta(seconds=10)}, datetime.now()), + ], +) +def test_range_not_contains(kwargs, item): + assert item not in Range(**kwargs) + + +@pytest.mark.parametrize( + "args,kwargs", + [ + (({},), {"lt": 42}), + ((), {"not_lt": 42}), + ((object(),), {}), + ((), {"lt": 1, "lte": 1}), + ((), {"gt": 1, "gte": 1}), + ], +) +def test_range_raises_value_error_on_wrong_params(args, kwargs): + with pytest.raises(ValueError): + Range(*args, **kwargs) + + +@pytest.mark.parametrize( + "range,lower,inclusive", + [ + (Range(gt=1), 1, False), + (Range(gte=1), 1, True), + (Range(), None, False), + (Range(lt=42), None, False), + ], +) +def test_range_lower(range, lower, inclusive): + assert (lower, inclusive) == range.lower + + +@pytest.mark.parametrize( + "range,upper,inclusive", + [ + (Range(lt=1), 1, False), + (Range(lte=1), 1, True), + (Range(), None, False), + (Range(gt=42), None, False), + ], +) +def test_range_upper(range, upper, inclusive): + assert (upper, inclusive) == range.upper diff --git a/test_opensearchpy/test_server/test_helpers/__init__.py b/test_opensearchpy/test_server/test_helpers/__init__.py new file mode 100644 index 00000000..7e52ae22 --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/__init__.py @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_opensearchpy/test_server/test_helpers/conftest.py b/test_opensearchpy/test_server/test_helpers/conftest.py new file mode 100644 index 00000000..4e167d34 --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/conftest.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import re +from datetime import datetime + +from pytest import fixture + +from opensearchpy.connection.connections import add_connection +from opensearchpy.helpers import bulk +from opensearchpy.helpers.test import get_test_client + +from .test_data import ( + DATA, + FLAT_DATA, + TEST_GIT_DATA, + create_flat_git_index, + create_git_index, +) +from .test_document import Comment, History, PullRequest, User + + +@fixture(scope="session") +def client(): + client = get_test_client(verify_certs=False, http_auth=("admin", "admin")) + add_connection("default", client) + return client + + +@fixture(scope="session") +def opensearch_version(client): + info = client.info() + print(info) + yield tuple( + int(x) + for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".") + ) + + +@fixture +def write_client(client): + yield client + client.indices.delete("test-*", ignore=404) + client.indices.delete_template("test-template", ignore=404) + + +@fixture(scope="session") +def data_client(client): + # create mappings + create_git_index(client, "git") + create_flat_git_index(client, "flat-git") + # load data + bulk(client, DATA, raise_on_error=True, refresh=True) + bulk(client, FLAT_DATA, raise_on_error=True, refresh=True) + yield client + client.indices.delete("git", ignore=404) + client.indices.delete("flat-git", ignore=404) + + +@fixture +def pull_request(write_client): + PullRequest.init() + pr = PullRequest( + _id=42, + comments=[ + Comment( + content="Hello World!", + author=User(name="honzakral"), + created_at=datetime(2018, 1, 9, 10, 17, 3, 21184), + history=[ + History( + timestamp=datetime(2012, 1, 1), + diff="-Ahoj Svete!\n+Hello World!", + ) + ], + ), + ], + created_at=datetime(2018, 1, 9, 9, 17, 3, 21184), + ) + pr.save(refresh=True) + return pr + + +@fixture +def setup_ubq_tests(client): + index = "test-git" + create_git_index(client, index) + bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True) + return index diff --git a/test_opensearchpy/test_server/test_helpers.py b/test_opensearchpy/test_server/test_helpers/test_actions.py similarity index 99% rename from test_opensearchpy/test_server/test_helpers.py rename to test_opensearchpy/test_server/test_helpers/test_actions.py index 11c8faaa..2230edb0 100644 --- a/test_opensearchpy/test_server/test_helpers.py +++ b/test_opensearchpy/test_server/test_helpers/test_actions.py @@ -30,8 +30,8 @@ from opensearchpy import TransportError, helpers from opensearchpy.helpers import ScanError -from ..test_cases import SkipTest -from . import OpenSearchTestCase +from ...test_cases import SkipTest +from .. import OpenSearchTestCase class FailingBulkClient(object): @@ -87,7 +87,7 @@ def test_all_errors_from_chunk_are_raised_on_failure(self): self.client, [{"a": "b"}, {"a": "c"}], index="i", raise_on_error=True ): self.assertTrue(ok) - except helpers.BulkIndexError as e: + except helpers.errors.BulkIndexError as e: self.assertEqual(2, len(e.errors)) else: assert False, "exception should have been raised" @@ -303,7 +303,7 @@ def test_error_is_raised(self): self.client.cluster.health(wait_for_status="yellow") self.assertRaises( - helpers.BulkIndexError, + helpers.errors.BulkIndexError, helpers.bulk, self.client, [{"a": 42}, {"a": "c"}], @@ -331,7 +331,7 @@ def test_ignore_error_if_raised(self): # ignore only the status code in the `ignore_status` argument self.assertRaises( - helpers.BulkIndexError, + helpers.errors.BulkIndexError, helpers.bulk, self.client, [{"a": 42}, {"a": "c"}], diff --git a/test_opensearchpy/test_server/test_helpers/test_analysis.py b/test_opensearchpy/test_server/test_helpers/test_analysis.py new file mode 100644 index 00000000..9b4f5849 --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/test_analysis.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from opensearchpy import analyzer, token_filter, tokenizer + + +def test_simulate_with_just__builtin_tokenizer(client): + a = analyzer("my-analyzer", tokenizer="keyword") + tokens = a.simulate("Hello World!", using=client).tokens + + assert len(tokens) == 1 + assert tokens[0].token == "Hello World!" + + +def test_simulate_complex(client): + a = analyzer( + "my-analyzer", + tokenizer=tokenizer("split_words", "simple_pattern_split", pattern=":"), + filter=["lowercase", token_filter("no-ifs", "stop", stopwords=["if"])], + ) + + tokens = a.simulate("if:this:works", using=client).tokens + + assert len(tokens) == 2 + assert ["this", "works"] == [t.token for t in tokens] + + +def test_simulate_builtin(client): + a = analyzer("my-analyzer", "english") + tokens = a.simulate("fixes running").tokens + + assert ["fix", "run"] == [t.token for t in tokens] diff --git a/test_opensearchpy/test_server/test_helpers/test_count.py b/test_opensearchpy/test_server/test_helpers/test_count.py new file mode 100644 index 00000000..f8aa612a --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/test_count.py @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from opensearchpy.helpers.search import Q, Search + + +def test_count_all(data_client): + s = Search(using=data_client).index("git") + assert 53 == s.count() + + +def test_count_prefetch(data_client, mocker): + mocker.spy(data_client, "count") + + search = Search(using=data_client).index("git") + search.execute() + assert search.count() == 53 + assert data_client.count.call_count == 0 + + search._response.hits.total.relation = "gte" + assert search.count() == 53 + assert data_client.count.call_count == 1 + + +def test_count_filter(data_client): + s = Search(using=data_client).index("git").filter(~Q("exists", field="parent_shas")) + # initial commit + repo document + assert 2 == s.count() diff --git a/test_opensearchpy/test_server/test_helpers/test_data.py b/test_opensearchpy/test_server/test_helpers/test_data.py new file mode 100644 index 00000000..20b63e39 --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/test_data.py @@ -0,0 +1,1114 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + + +def create_flat_git_index(client, index): + # we will use user on several places + user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} + } + + client.indices.create( + index=index, + body={ + "settings": { + # just one shard, no replicas for testing + "number_of_shards": 1, + "number_of_replicas": 0, + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + "description": {"type": "text", "analyzer": "snowball"}, + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + } + }, + }, + ) + + +def create_git_index(client, index): + # we will use user on several places + user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} + } + + client.indices.create( + index=index, + body={ + "settings": { + # just one shard, no replicas for testing + "number_of_shards": 1, + "number_of_replicas": 0, + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + # common fields + "description": {"type": "text", "analyzer": "snowball"}, + "commit_repo": {"type": "join", "relations": {"repo": "commit"}}, + # COMMIT mappings + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + # REPO mappings + "is_public": {"type": "boolean"}, + "owner": user_mapping, + "created_at": {"type": "date"}, + "tags": {"type": "keyword"}, + } + }, + }, + ) + + +DATA = [ + # repository + { + "_id": "opensearch-py", + "_source": { + "commit_repo": "repo", + "organization": "opensearch", + "created_at": "2014-03-03", + "owner": {"name": "opensearch"}, + "is_public": True, + }, + "_index": "git", + }, + # documents + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_aggs.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 7, "insertions": 23, "lines": 30, "files": 4}, + "description": "Make sure buckets aren't modified in-place", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["eb3e543323f189fd7b698e66295427204fff5755"], + "committed_date": "2014-05-02T13:47:19", + "authored_date": "2014-05-02T13:47:19.123+02:00", + }, + "_index": "git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 18, "lines": 18, "files": 1}, + "description": "Add communication with OpenSearch server", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["dd15b6ba17dd9ba16363a51f85b31f66f1fb1157"], + "committed_date": "2014-05-01T13:32:14", + "authored_date": "2014-05-01T13:32:14", + }, + "_index": "git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "test_opensearchpy/test_dsl/test_result.py", + "opensearchpy/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 18, "insertions": 44, "lines": 62, "files": 3}, + "description": "Minor cleanup and adding helpers for interactive python", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ed19caf25abd25300e707fadf3f81b05c5673446"], + "committed_date": "2014-05-01T13:30:44", + "authored_date": "2014-05-01T13:30:44", + }, + "_index": "git", + }, + { + "_id": "ed19caf25abd25300e707fadf3f81b05c5673446", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 28, "lines": 28, "files": 3}, + "description": "Make sure aggs do copy-on-write", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["583e52c71e9a72c1b291ec5843683d8fa8f1ce2d"], + "committed_date": "2014-04-27T16:28:09", + "authored_date": "2014-04-27T16:28:09", + }, + "_index": "git", + }, + { + "_id": "583e52c71e9a72c1b291ec5843683d8fa8f1ce2d", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/aggs.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 1, "lines": 2, "files": 1}, + "description": "Use __setitem__ from DslBase in AggsBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dd19210b5be92b960f7db6f66ae526288edccc3"], + "committed_date": "2014-04-27T15:51:53", + "authored_date": "2014-04-27T15:51:53", + }, + "_index": "git", + }, + { + "_id": "1dd19210b5be92b960f7db6f66ae526288edccc3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_search.py", + "opensearchpy/search.py", + "opensearchpy/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 21, "insertions": 98, "lines": 119, "files": 5}, + "description": "Have Search clone itself on any change besides aggs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b4c9e29376af2e42a4e6dc153f0f293b1a18bac3"], + "committed_date": "2014-04-26T14:49:43", + "authored_date": "2014-04-26T14:49:43", + }, + "_index": "git", + }, + { + "_id": "b4c9e29376af2e42a4e6dc153f0f293b1a18bac3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 5, "lines": 5, "files": 1}, + "description": "Add tests for [] on response", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a64a54181b232bb5943bd16960be9416e402f5f5"], + "committed_date": "2014-04-26T13:56:52", + "authored_date": "2014-04-26T13:56:52", + }, + "_index": "git", + }, + { + "_id": "a64a54181b232bb5943bd16960be9416e402f5f5", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 7, "lines": 8, "files": 1}, + "description": "Test access to missing fields raises appropriate exceptions", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["df3f778a3d37b170bde6979a4ef2d9e3e6400778"], + "committed_date": "2014-04-25T16:01:07", + "authored_date": "2014-04-25T16:01:07", + }, + "_index": "git", + }, + { + "_id": "df3f778a3d37b170bde6979a4ef2d9e3e6400778", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "test_opensearchpy/test_dsl/test_result.py", + "opensearchpy/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 8, "insertions": 31, "lines": 39, "files": 3}, + "description": "Support attribute access even for inner/nested objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925"], + "committed_date": "2014-04-25T15:59:02", + "authored_date": "2014-04-25T15:59:02", + }, + "_index": "git", + }, + { + "_id": "7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "test_opensearchpy/test_dsl/test_result.py", + "opensearchpy/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 149, "lines": 149, "files": 2}, + "description": "Added a prototype of a Respose and Result classes", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e2882d28cb8077eaa3e5d8ae76543482d4d90f7e"], + "committed_date": "2014-04-25T15:12:15", + "authored_date": "2014-04-25T15:12:15", + }, + "_index": "git", + }, + { + "_id": "e2882d28cb8077eaa3e5d8ae76543482d4d90f7e", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["docs/index.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "add warning to the docs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["51f94d83d1c47d3b81207736ca97a1ec6302678f"], + "committed_date": "2014-04-22T19:16:21", + "authored_date": "2014-04-22T19:16:21", + }, + "_index": "git", + }, + { + "_id": "51f94d83d1c47d3b81207736ca97a1ec6302678f", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 29, "lines": 32, "files": 1}, + "description": "Add some comments to the code", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0950f6c600b49e2bf012d03b02250fb71c848555"], + "committed_date": "2014-04-22T19:12:06", + "authored_date": "2014-04-22T19:12:06", + }, + "_index": "git", + }, + { + "_id": "0950f6c600b49e2bf012d03b02250fb71c848555", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "Added a WIP warning", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["54d058f5ac6be8225ef61d5529772aada42ec6c8"], + "committed_date": "2014-04-20T00:19:25", + "authored_date": "2014-04-20T00:19:25", + }, + "_index": "git", + }, + { + "_id": "54d058f5ac6be8225ef61d5529772aada42ec6c8", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/__init__.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 36, "insertions": 7, "lines": 43, "files": 3}, + "description": "Remove the operator kwarg from .query", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4cb07845e45787abc1f850c0b561e487e0034424"], + "committed_date": "2014-04-20T00:17:25", + "authored_date": "2014-04-20T00:17:25", + }, + "_index": "git", + }, + { + "_id": "4cb07845e45787abc1f850c0b561e487e0034424", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 35, "insertions": 49, "lines": 84, "files": 2}, + "description": "Complex example", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["578abe80f76aafd7e81fe46a44403e601733a938"], + "committed_date": "2014-03-24T20:48:45", + "authored_date": "2014-03-24T20:48:45", + }, + "_index": "git", + }, + { + "_id": "578abe80f76aafd7e81fe46a44403e601733a938", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 0, "lines": 2, "files": 1}, + "description": "removing extra whitespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ecb84f03565940c7d294dbc80723420dcfbab340"], + "committed_date": "2014-03-24T20:42:23", + "authored_date": "2014-03-24T20:42:23", + }, + "_index": "git", + }, + { + "_id": "ecb84f03565940c7d294dbc80723420dcfbab340", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 3, "lines": 4, "files": 1}, + "description": "Make sure attribute access works for .query on Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["9a247c876ab66e2bca56b25f392d054e613b1b2a"], + "committed_date": "2014-03-24T20:35:02", + "authored_date": "2014-03-24T20:34:46", + }, + "_index": "git", + }, + { + "_id": "9a247c876ab66e2bca56b25f392d054e613b1b2a", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 2, "lines": 2, "files": 1}, + "description": "Make sure .index and .doc_type methods are chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["cee5e46947d510a49edd3609ff91aab7b1f3ac89"], + "committed_date": "2014-03-24T20:27:46", + "authored_date": "2014-03-24T20:27:46", + }, + "_index": "git", + }, + { + "_id": "cee5e46947d510a49edd3609ff91aab7b1f3ac89", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + "opensearchpy/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 13, "insertions": 128, "lines": 141, "files": 3}, + "description": "Added .filter and .post_filter to Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1d6857182b09a556d58c6bc5bdcb243092812ba3"], + "committed_date": "2014-03-24T20:26:57", + "authored_date": "2014-03-24T20:26:57", + }, + "_index": "git", + }, + { + "_id": "1d6857182b09a556d58c6bc5bdcb243092812ba3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py", "opensearchpy/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 29, "lines": 53, "files": 2}, + "description": "Extracted combination logic into DslBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4ad92f15a1955846c01642318303a821e8435b75"], + "committed_date": "2014-03-24T20:03:51", + "authored_date": "2014-03-24T20:03:51", + }, + "_index": "git", + }, + { + "_id": "4ad92f15a1955846c01642318303a821e8435b75", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py", "opensearchpy/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 43, "insertions": 45, "lines": 88, "files": 2}, + "description": "Extracted bool-related logic to a mixin to be reused by filters", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6eb39dc2825605543ac1ed0b45b9b6baeecc44c2"], + "committed_date": "2014-03-24T19:16:16", + "authored_date": "2014-03-24T19:16:16", + }, + "_index": "git", + }, + { + "_id": "6eb39dc2825605543ac1ed0b45b9b6baeecc44c2", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 32, "lines": 33, "files": 2}, + "description": "Enable otheroperators when querying on Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["be094c7b307332cb6039bf9a7c984d2c7593ddff"], + "committed_date": "2014-03-24T18:25:10", + "authored_date": "2014-03-24T18:25:10", + }, + "_index": "git", + }, + { + "_id": "be094c7b307332cb6039bf9a7c984d2c7593ddff", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 23, "insertions": 35, "lines": 58, "files": 3}, + "description": "make sure query operations always return copies", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b2576e3b6437e2cb9d8971fee4ead60df91fd75b"], + "committed_date": "2014-03-24T18:10:37", + "authored_date": "2014-03-24T18:03:13", + }, + "_index": "git", + }, + { + "_id": "b2576e3b6437e2cb9d8971fee4ead60df91fd75b", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 53, "lines": 54, "files": 2}, + "description": "Adding or operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1be002170ac3cd59d2e97824b83b88bb3c9c60ed"], + "committed_date": "2014-03-24T17:53:38", + "authored_date": "2014-03-24T17:53:38", + }, + "_index": "git", + }, + { + "_id": "1be002170ac3cd59d2e97824b83b88bb3c9c60ed", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 35, "lines": 35, "files": 2}, + "description": "Added inverting of queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["24e1e38b2f704f65440d96c290b7c6cd54c2e00e"], + "committed_date": "2014-03-23T17:44:36", + "authored_date": "2014-03-23T17:44:36", + }, + "_index": "git", + }, + { + "_id": "24e1e38b2f704f65440d96c290b7c6cd54c2e00e", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/aggs.py", "opensearchpy/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 5, "insertions": 1, "lines": 6, "files": 2}, + "description": "Change equality checks to use .to_dict()", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277cfaedbaf3705ed74ad6296227e1172c97a63f"], + "committed_date": "2014-03-23T17:43:01", + "authored_date": "2014-03-23T17:43:01", + }, + "_index": "git", + }, + { + "_id": "277cfaedbaf3705ed74ad6296227e1172c97a63f", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 11, "lines": 12, "files": 2}, + "description": "Test combining of bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6aa3868a6a9f35f71553ce96f9d3d63c74d054fd"], + "committed_date": "2014-03-21T15:15:06", + "authored_date": "2014-03-21T15:15:06", + }, + "_index": "git", + }, + { + "_id": "6aa3868a6a9f35f71553ce96f9d3d63c74d054fd", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 23, "lines": 24, "files": 2}, + "description": "Adding & operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["bb311eb35e7eb53fb5ae01e3f80336866c7e3e37"], + "committed_date": "2014-03-21T15:10:08", + "authored_date": "2014-03-21T15:10:08", + }, + "_index": "git", + }, + { + "_id": "bb311eb35e7eb53fb5ae01e3f80336866c7e3e37", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 4, "lines": 5, "files": 2}, + "description": "Don't serialize empty typed fields into dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["aea8ea9e421bd53a5b058495e68c3fd57bb1dacc"], + "committed_date": "2014-03-15T16:29:37", + "authored_date": "2014-03-15T16:29:37", + }, + "_index": "git", + }, + { + "_id": "aea8ea9e421bd53a5b058495e68c3fd57bb1dacc", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 37, "lines": 40, "files": 3}, + "description": "Bool queries, when combining just adds their params together", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a8819a510b919be43ff3011b904f257798fb8916"], + "committed_date": "2014-03-15T16:16:40", + "authored_date": "2014-03-15T16:16:40", + }, + "_index": "git", + }, + { + "_id": "a8819a510b919be43ff3011b904f257798fb8916", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/run_tests.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 2, "lines": 8, "files": 1}, + "description": "Simpler run_tests.py", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e35792a725be2325fc54d3fcb95a7d38d8075a99"], + "committed_date": "2014-03-15T16:02:21", + "authored_date": "2014-03-15T16:02:21", + }, + "_index": "git", + }, + { + "_id": "e35792a725be2325fc54d3fcb95a7d38d8075a99", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/aggs.py", "opensearchpy/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 2}, + "description": "Maku we don't treat shortcuts as methods.", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc"], + "committed_date": "2014-03-15T15:59:21", + "authored_date": "2014-03-15T15:59:21", + }, + "_index": "git", + }, + { + "_id": "3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/query.py", + "opensearchpy/utils.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 9, "insertions": 5, "lines": 14, "files": 3}, + "description": "Centralize == of Dsl objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b5e7d0c4b284211df8f7b464fcece93a27a802fb"], + "committed_date": "2014-03-10T21:37:24", + "authored_date": "2014-03-10T21:37:24", + }, + "_index": "git", + }, + { + "_id": "b5e7d0c4b284211df8f7b464fcece93a27a802fb", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + "opensearchpy/utils.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 75, "insertions": 115, "lines": 190, "files": 6}, + "description": "Experimental draft with more declarative DSL", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0fe741b43adee5ca1424584ddd3f35fa33f8733c"], + "committed_date": "2014-03-10T21:34:39", + "authored_date": "2014-03-10T21:34:39", + }, + "_index": "git", + }, + { + "_id": "0fe741b43adee5ca1424584ddd3f35fa33f8733c", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 1}, + "description": "Make sure .query is chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a22be5933d4b022cbacee867b1aece120208edf3"], + "committed_date": "2014-03-07T17:41:59", + "authored_date": "2014-03-07T17:41:59", + }, + "_index": "git", + }, + { + "_id": "a22be5933d4b022cbacee867b1aece120208edf3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 14, "insertions": 44, "lines": 58, "files": 3}, + "description": "Search now does aggregations", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e823686aacfc4bdcb34ffdab337a26fa09659a9a"], + "committed_date": "2014-03-07T17:29:55", + "authored_date": "2014-03-07T17:29:55", + }, + "_index": "git", + }, + { + "_id": "e823686aacfc4bdcb34ffdab337a26fa09659a9a", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 1, "lines": 1, "files": 1}, + "description": "Ignore html coverage report", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e0aedb3011c71d704deec03a8f32b2b360d6e364"], + "committed_date": "2014-03-07T17:03:23", + "authored_date": "2014-03-07T17:03:23", + }, + "_index": "git", + }, + { + "_id": "e0aedb3011c71d704deec03a8f32b2b360d6e364", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "test_opensearchpy/test_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 228, "lines": 228, "files": 2}, + "description": "Added aggregation DSL objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd"], + "committed_date": "2014-03-07T16:25:55", + "authored_date": "2014-03-07T16:25:55", + }, + "_index": "git", + }, + { + "_id": "61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py", "opensearchpy/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 12, "insertions": 7, "lines": 19, "files": 2}, + "description": "Only retrieve DslClass, leave the instantiation to the caller", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["647f1017a7b17a913e07af70a3b03202f6adbdfd"], + "committed_date": "2014-03-07T15:27:43", + "authored_date": "2014-03-07T15:27:43", + }, + "_index": "git", + }, + { + "_id": "647f1017a7b17a913e07af70a3b03202f6adbdfd", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "test_opensearchpy/test_dsl/test_search.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 19, "insertions": 19, "lines": 38, "files": 3}, + "description": "No need to replicate Query suffix when in query namespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d"], + "committed_date": "2014-03-07T15:19:01", + "authored_date": "2014-03-07T15:19:01", + }, + "_index": "git", + }, + { + "_id": "7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 3, "lines": 5, "files": 1}, + "description": "Ask forgiveness, not permission", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["c10793c2ca43688195e415b25b674ff34d58eaff"], + "committed_date": "2014-03-07T15:13:22", + "authored_date": "2014-03-07T15:13:22", + }, + "_index": "git", + }, + { + "_id": "c10793c2ca43688195e415b25b674ff34d58eaff", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 27, "lines": 51, "files": 3}, + "description": "Extract DSL object registration to DslMeta", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d8867fdb17fcf4c696657740fa08d29c36adc6ec"], + "committed_date": "2014-03-07T15:12:13", + "authored_date": "2014-03-07T15:10:31", + }, + "_index": "git", + }, + { + "_id": "d8867fdb17fcf4c696657740fa08d29c36adc6ec", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "Search.to_dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2eb7cd980d917ed6f4a4dd8e246804f710ec5082"], + "committed_date": "2014-03-07T02:58:33", + "authored_date": "2014-03-07T02:58:33", + }, + "_index": "git", + }, + { + "_id": "2eb7cd980d917ed6f4a4dd8e246804f710ec5082", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 113, "lines": 113, "files": 2}, + "description": "Basic Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["11708576f9118e0dbf27ae1f8a7b799cf281b511"], + "committed_date": "2014-03-06T21:02:03", + "authored_date": "2014-03-06T21:01:05", + }, + "_index": "git", + }, + { + "_id": "11708576f9118e0dbf27ae1f8a7b799cf281b511", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "/query.popensearchpyy", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "MatchAll query + anything is anything", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dc496e5c7c1b2caf290df477fca2db61ebe37e0"], + "committed_date": "2014-03-06T20:40:39", + "authored_date": "2014-03-06T20:39:52", + }, + "_index": "git", + }, + { + "_id": "1dc496e5c7c1b2caf290df477fca2db61ebe37e0", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 53, "lines": 53, "files": 2}, + "description": "From_dict, Q(dict) and bool query parses it's subqueries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d407f99d1959b7b862a541c066d9fd737ce913f3"], + "committed_date": "2014-03-06T20:24:30", + "authored_date": "2014-03-06T20:24:30", + }, + "_index": "git", + }, + { + "_id": "d407f99d1959b7b862a541c066d9fd737ce913f3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["CONTRIBUTING.md", "README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 21, "lines": 27, "files": 2}, + "description": "Housekeeping - licence and updated generic CONTRIBUTING.md", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277e8ecc7395754d1ba1f2411ec32337a3e9d73f"], + "committed_date": "2014-03-05T16:21:44", + "authored_date": "2014-03-05T16:21:44", + }, + "_index": "git", + }, + { + "_id": "277e8ecc7395754d1ba1f2411ec32337a3e9d73f", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "setup.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 59, "lines": 59, "files": 3}, + "description": "Automatic query registration and Q function", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["8f1e34bd8f462fec50bcc10971df2d57e2986604"], + "committed_date": "2014-03-05T16:18:52", + "authored_date": "2014-03-05T16:18:52", + }, + "_index": "git", + }, + { + "_id": "8f1e34bd8f462fec50bcc10971df2d57e2986604", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 54, "lines": 54, "files": 2}, + "description": "Initial implementation of match and bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["fcff47ddcc6d08be5739d03dd30f504fb9db2608"], + "committed_date": "2014-03-05T15:55:06", + "authored_date": "2014-03-05T15:55:06", + }, + "_index": "git", + }, + { + "_id": "fcff47ddcc6d08be5739d03dd30f504fb9db2608", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "docs/Makefile", + "CONTRIBUTING.md", + "docs/conf.py", + "LICENSE", + "Changelog.rst", + "docs/index.rst", + "docs/Changelog.rst", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 692, "lines": 692, "files": 7}, + "description": "Docs template", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["febe8127ae48fcc81778c0fb2d628f1bcc0a0350"], + "committed_date": "2014-03-04T01:42:31", + "authored_date": "2014-03-04T01:42:31", + }, + "_index": "git", + }, + { + "_id": "febe8127ae48fcc81778c0fb2d628f1bcc0a0350", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/__init__.py", + "test_opensearchpy/test_dsl/run_tests.py", + "setup.py", + "README.rst", + "test_opensearchpy/test_dsl/__init__.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 82, "lines": 82, "files": 5}, + "description": "Empty project structure", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2a8f1ce89760bfc72808f3945b539eae650acac9"], + "committed_date": "2014-03-04T01:37:49", + "authored_date": "2014-03-03T18:23:55", + }, + "_index": "git", + }, + { + "_id": "2a8f1ce89760bfc72808f3945b539eae650acac9", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 9, "lines": 9, "files": 1}, + "description": "Initial commit, .gitignore", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": [], + "committed_date": "2014-03-03T18:15:05", + "authored_date": "2014-03-03T18:15:05", + }, + "_index": "git", + }, +] + + +def flatten_doc(d): + src = d["_source"].copy() + del src["commit_repo"] + return {"_index": "flat-git", "_id": d["_id"], "_source": src} + + +FLAT_DATA = [flatten_doc(d) for d in DATA if "routing" in d] + + +def create_test_git_data(d): + src = d["_source"].copy() + return { + "_index": "test-git", + "routing": "opensearch-py", + "_id": d["_id"], + "_source": src, + } + + +TEST_GIT_DATA = [create_test_git_data(d) for d in DATA] diff --git a/test_opensearchpy/test_server/test_helpers/test_document.py b/test_opensearchpy/test_server/test_helpers/test_document.py new file mode 100644 index 00000000..1bb6ce12 --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/test_document.py @@ -0,0 +1,566 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime +from ipaddress import ip_address + +import pytest +from pytest import raises +from pytz import timezone + +from opensearchpy import ( + Binary, + Boolean, + ConflictError, + Date, + Document, + Double, + InnerDoc, + Ip, + Keyword, + Long, + Mapping, + MetaField, + Nested, + NotFoundError, + Object, + Q, + RankFeatures, + Text, + analyzer, +) +from opensearchpy.helpers.utils import AttrList + +snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) + + +class User(InnerDoc): + name = Text(fields={"raw": Keyword()}) + + +class Wiki(Document): + owner = Object(User) + views = Long() + ranked = RankFeatures() + + class Index: + name = "test-wiki" + + +class Repository(Document): + owner = Object(User) + created_at = Date() + description = Text(analyzer=snowball) + tags = Keyword() + + @classmethod + def search(cls): + return super(Repository, cls).search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(Document): + committed_date = Date() + authored_date = Date() + description = Text(analyzer=snowball) + + class Index: + name = "flat-git" + + class Meta: + mapping = Mapping() + + +class History(InnerDoc): + timestamp = Date() + diff = Text() + + +class Comment(InnerDoc): + content = Text() + created_at = Date() + author = Object(User) + history = Nested(History) + + class Meta: + dynamic = MetaField(False) + + +class PullRequest(Document): + comments = Nested(Comment) + created_at = Date() + + class Index: + name = "test-prs" + + +class SerializationDoc(Document): + i = Long() + b = Boolean() + d = Double() + bin = Binary() + ip = Ip() + + class Index: + name = "test-serialization" + + +def test_serialization(write_client): + SerializationDoc.init() + write_client.index( + index="test-serialization", + id=42, + body={ + "i": [1, 2, "3", None], + "b": [True, False, "true", "false", None], + "d": [0.1, "-0.1", None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "ip": ["::1", "127.0.0.1", None], + }, + ) + sd = SerializationDoc.get(id=42) + + assert sd.i == [1, 2, 3, None] + assert sd.b == [True, False, True, False, None] + assert sd.d == [0.1, -0.1, None] + assert sd.bin == [b"Hello World", None] + assert sd.ip == [ip_address("::1"), ip_address("127.0.0.1"), None] + + assert sd.to_dict() == { + "b": [True, False, True, False, None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "d": [0.1, -0.1, None], + "i": [1, 2, 3, None], + "ip": ["::1", "127.0.0.1", None], + } + + +def test_nested_inner_hits_are_wrapped_properly(pull_request): + history_query = Q( + "nested", + path="comments.history", + inner_hits={}, + query=Q("match", comments__history__diff="ahoj"), + ) + s = PullRequest.search().query( + "nested", inner_hits={}, path="comments", query=history_query + ) + + response = s.execute() + pr = response.hits[0] + assert isinstance(pr, PullRequest) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].history[0], History) + + comment = pr.meta.inner_hits.comments.hits[0] + assert isinstance(comment, Comment) + assert comment.author.name == "honzakral" + assert isinstance(comment.history[0], History) + + history = comment.meta.inner_hits["comments.history"].hits[0] + assert isinstance(history, History) + assert history.timestamp == datetime(2012, 1, 1) + assert "score" in history.meta + + +def test_nested_inner_hits_are_deserialized_properly(pull_request): + s = PullRequest.search().query( + "nested", + inner_hits={}, + path="comments", + query=Q("match", comments__content="hello"), + ) + + response = s.execute() + pr = response.hits[0] + assert isinstance(pr.created_at, datetime) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].created_at, datetime) + + +def test_nested_top_hits_are_wrapped_properly(pull_request): + s = PullRequest.search() + s.aggs.bucket("comments", "nested", path="comments").metric( + "hits", "top_hits", size=1 + ) + + r = s.execute() + + print(r._d_) + assert isinstance(r.aggregations.comments.hits.hits[0], Comment) + + +def test_update_object_field(write_client): + Wiki.init() + w = Wiki( + owner=User(name="Honza Kral"), + _id="opensearch-py", + ranked={"test1": 0.1, "topic2": 0.2}, + ) + w.save() + + assert "updated" == w.update(owner=[{"name": "Honza"}, {"name": "Nick"}]) + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + w = Wiki.get(id="opensearch-py") + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + assert w.ranked == {"test1": 0.1, "topic2": 0.2} + + +def test_update_script(write_client): + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42) + w.save() + + w.update(script="ctx._source.views += params.inc", inc=5) + w = Wiki.get(id="opensearch-py") + assert w.views == 47 + + +def test_update_retry_on_conflict(write_client): + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42) + w.save() + + w1 = Wiki.get(id="opensearch-py") + w2 = Wiki.get(id="opensearch-py") + w1.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1) + w2.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1) + + w = Wiki.get(id="opensearch-py") + assert w.views == 52 + + +@pytest.mark.parametrize("retry_on_conflict", [None, 0]) +def test_update_conflicting_version(write_client, retry_on_conflict): + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42) + w.save() + + w1 = Wiki.get(id="opensearch-py") + w2 = Wiki.get(id="opensearch-py") + w1.update(script="ctx._source.views += params.inc", inc=5) + + with raises(ConflictError): + w2.update( + script="ctx._source.views += params.inc", + inc=5, + retry_on_conflict=retry_on_conflict, + ) + + +def test_save_and_update_return_doc_meta(write_client): + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42) + resp = w.save(return_doc_meta=True) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "created" + assert resp.keys().__contains__("_id") + assert resp.keys().__contains__("_primary_term") + assert resp.keys().__contains__("_seq_no") + assert resp.keys().__contains__("_shards") + assert resp.keys().__contains__("_version") + + resp = w.update( + script="ctx._source.views += params.inc", inc=5, return_doc_meta=True + ) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "updated" + assert resp.keys().__contains__("_id") + assert resp.keys().__contains__("_primary_term") + assert resp.keys().__contains__("_seq_no") + assert resp.keys().__contains__("_shards") + assert resp.keys().__contains__("_version") + + +def test_init(write_client): + Repository.init(index="test-git") + + assert write_client.indices.exists(index="test-git") + + +def test_get_raises_404_on_index_missing(data_client): + with raises(NotFoundError): + Repository.get("opensearch-dsl-php", index="not-there") + + +def test_get_raises_404_on_non_existent_id(data_client): + with raises(NotFoundError): + Repository.get("opensearch-dsl-php") + + +def test_get_returns_none_if_404_ignored(data_client): + assert None is Repository.get("opensearch-dsl-php", ignore=404) + + +def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client): + assert None is Repository.get("42", index="not-there", ignore=404) + + +def test_get(data_client): + opensearch_repo = Repository.get("opensearch-py") + + assert isinstance(opensearch_repo, Repository) + assert opensearch_repo.owner.name == "opensearch" + assert datetime(2014, 3, 3) == opensearch_repo.created_at + + +def test_exists_return_true(data_client): + assert Repository.exists("opensearch-py") + + +def test_exists_false(data_client): + assert not Repository.exists("opensearch-dsl-php") + + +def test_get_with_tz_date(data_client): + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py" + ) + + tzinfo = timezone("Europe/Prague") + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) + == first_commit.authored_date + ) + + +def test_save_with_tz_date(data_client): + tzinfo = timezone("Europe/Prague") + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py" + ) + first_commit.committed_date = tzinfo.localize( + datetime(2014, 5, 2, 13, 47, 19, 123456) + ) + first_commit.save() + + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py" + ) + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) + == first_commit.committed_date + ) + + +COMMIT_DOCS_WITH_MISSING = [ + {"_id": "0"}, # Missing + {"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing + {"_id": "f"}, # Missing + {"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing +] + + +def test_mget(data_client): + commits = Commit.mget(COMMIT_DOCS_WITH_MISSING) + assert commits[0] is None + assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[2] is None + assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +def test_mget_raises_exception_when_missing_param_is_invalid(data_client): + with raises(ValueError): + Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj") + + +def test_mget_raises_404_when_missing_param_is_raise(data_client): + with raises(NotFoundError): + Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise") + + +def test_mget_ignores_missing_docs_when_missing_param_is_skip(data_client): + commits = Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip") + assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +def test_update_works_from_search_response(data_client): + opensearch_repo = Repository.search().execute()[0] + + opensearch_repo.update(owner={"other_name": "opensearchpy"}) + assert "opensearchpy" == opensearch_repo.owner.other_name + + new_version = Repository.get("opensearch-py") + assert "opensearchpy" == new_version.owner.other_name + assert "opensearch" == new_version.owner.name + + +def test_update(data_client): + opensearch_repo = Repository.get("opensearch-py") + v = opensearch_repo.meta.version + + old_seq_no = opensearch_repo.meta.seq_no + opensearch_repo.update( + owner={"new_name": "opensearchpy"}, new_field="testing-update" + ) + + assert "opensearchpy" == opensearch_repo.owner.new_name + assert "testing-update" == opensearch_repo.new_field + + # assert version has been updated + assert opensearch_repo.meta.version == v + 1 + + new_version = Repository.get("opensearch-py") + assert "testing-update" == new_version.new_field + assert "opensearchpy" == new_version.owner.new_name + assert "opensearch" == new_version.owner.name + assert "seq_no" in new_version.meta + assert new_version.meta.seq_no != old_seq_no + assert "primary_term" in new_version.meta + + +def test_save_updates_existing_doc(data_client): + opensearch_repo = Repository.get("opensearch-py") + + opensearch_repo.new_field = "testing-save" + old_seq_no = opensearch_repo.meta.seq_no + assert "updated" == opensearch_repo.save() + + new_repo = data_client.get(index="git", id="opensearch-py") + assert "testing-save" == new_repo["_source"]["new_field"] + assert new_repo["_seq_no"] != old_seq_no + assert new_repo["_seq_no"] == opensearch_repo.meta.seq_no + + +def test_save_automatically_uses_seq_no_and_primary_term(data_client): + opensearch_repo = Repository.get("opensearch-py") + opensearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + opensearch_repo.save() + + +def test_delete_automatically_uses_seq_no_and_primary_term(data_client): + opensearch_repo = Repository.get("opensearch-py") + opensearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + opensearch_repo.delete() + + +def assert_doc_equals(expected, actual): + for f in expected: + assert f in actual + assert actual[f] == expected[f] + + +def test_can_save_to_different_index(write_client): + test_repo = Repository(description="testing", meta={"id": 42}) + assert test_repo.save(index="test-document") + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"description": "testing"}, + }, + write_client.get(index="test-document", id=42), + ) + + +def test_save_without_skip_empty_will_include_empty_fields(write_client): + test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42}) + assert test_repo.save(index="test-document", skip_empty=False) + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"field_1": [], "field_2": None, "field_3": {}}, + }, + write_client.get(index="test-document", id=42), + ) + + +def test_delete(write_client): + write_client.create( + index="test-document", + id="opensearch-py", + body={ + "organization": "opensearch", + "created_at": "2014-03-03", + "owner": {"name": "opensearch"}, + }, + ) + + test_repo = Repository(meta={"id": "opensearch-py"}) + test_repo.meta.index = "test-document" + test_repo.delete() + + assert not write_client.exists( + index="test-document", + id="opensearch-py", + ) + + +def test_search(data_client): + assert Repository.search().count() == 1 + + +def test_search_returns_proper_doc_classes(data_client): + result = Repository.search().execute() + + opensearch_repo = result.hits[0] + + assert isinstance(opensearch_repo, Repository) + assert opensearch_repo.owner.name == "opensearch" + + +def test_refresh_mapping(data_client): + class Commit(Document): + class Index: + name = "git" + + Commit._index.load_mappings() + + assert "stats" in Commit._index._mapping + assert "committer" in Commit._index._mapping + assert "description" in Commit._index._mapping + assert "committed_date" in Commit._index._mapping + assert isinstance(Commit._index._mapping["committed_date"], Date) + + +def test_highlight_in_meta(data_client): + commit = ( + Commit.search() + .query("match", description="inverting") + .highlight("description") + .execute()[0] + ) + + assert isinstance(commit, Commit) + assert "description" in commit.meta.highlight + assert isinstance(commit.meta.highlight["description"], AttrList) + assert len(commit.meta.highlight["description"]) > 0 diff --git a/test_opensearchpy/test_server/test_helpers/test_faceted_search.py b/test_opensearchpy/test_server/test_helpers/test_faceted_search.py new file mode 100644 index 00000000..6b9ee50c --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/test_faceted_search.py @@ -0,0 +1,287 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from opensearchpy import A, Boolean, Date, Document, Keyword +from opensearchpy.helpers.faceted_search import ( + DateHistogramFacet, + FacetedSearch, + NestedFacet, + RangeFacet, + TermsFacet, +) + +from .test_document import PullRequest + + +class Repos(Document): + is_public = Boolean() + created_at = Date() + + class Index: + name = "git" + + +class Commit(Document): + files = Keyword() + committed_date = Date() + + class Index: + name = "git" + + +class MetricSearch(FacetedSearch): + index = "git" + doc_types = [Commit] + + facets = { + "files": TermsFacet(field="files", metric=A("max", field="committed_date")), + } + + +@pytest.fixture(scope="session") +def commit_search_cls(opensearch_version): + interval_kwargs = {"fixed_interval": "1d"} + + class CommitSearch(FacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", + ) + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(opensearch_version): + interval_type = "calendar_interval" + + class RepoSearch(FacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self): + s = super(RepoSearch, self).search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(opensearch_version): + interval_type = "calendar_interval" + + class PRSearch(FacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch + + +def test_facet_with_custom_metric(data_client): + ms = MetricSearch() + r = ms.execute() + + dates = [f[1] for f in r.facets.files] + assert dates == list(sorted(dates, reverse=True)) + assert dates[0] == 1399038439000 + + +def test_nested_facet(pull_request, pr_search_cls): + prs = pr_search_cls() + r = prs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments + + +def test_nested_facet_with_filter(pull_request, pr_search_cls): + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) + r = prs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments + + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + r = prs.execute() + assert not r.hits + + +def test_datehistogram_facet(data_client, repo_search_cls): + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created + + +def test_boolean_facet(data_client, repo_search_cls): + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 + assert [(True, 1, False)] == r.facets.public + value, count, selected = r.facets.public[0] + assert value is True + + +def test_empty_search_finds_everything( + data_client, opensearch_version, commit_search_cls +): + cs = commit_search_cls() + r = cs.execute() + assert r.hits.total.value == 52 + assert [ + ("opensearchpy", 39, False), + ("test_opensearchpy", 35, False), + ("test_opensearchpy/test_dsl", 35, False), + ("opensearchpy/query.py", 18, False), + ("test_opensearchpy/test_dsl/test_search.py", 15, False), + ("opensearchpy/utils.py", 14, False), + ("test_opensearchpy/test_dsl/test_query.py", 13, False), + ("opensearchpy/search.py", 12, False), + ("opensearchpy/aggs.py", 11, False), + ("test_opensearchpy/test_dsl/test_result.py", 5, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 2, False), + (datetime(2014, 3, 4, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 3, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 9, False), + (datetime(2014, 3, 10, 0, 0), 2, False), + (datetime(2014, 3, 15, 0, 0), 4, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 2, False), + (datetime(2014, 3, 24, 0, 0), 10, False), + (datetime(2014, 4, 20, 0, 0), 2, False), + (datetime(2014, 4, 22, 0, 0), 2, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 2, False), + (datetime(2014, 5, 1, 0, 0), 2, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 19, False), + ("good", 14, False), + ("better", 19, False), + ] == r.facets.deletions + + +def test_term_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"files": "test_opensearchpy/test_dsl"}) + + r = cs.execute() + + assert 35 == r.hits.total.value + assert [ + ("opensearchpy", 39, False), + ("test_opensearchpy", 35, False), + ("test_opensearchpy/test_dsl", 35, True), + ("opensearchpy/query.py", 18, False), + ("test_opensearchpy/test_dsl/test_search.py", 15, False), + ("opensearchpy/utils.py", 14, False), + ("test_opensearchpy/test_dsl/test_query.py", 13, False), + ("opensearchpy/search.py", 12, False), + ("opensearchpy/aggs.py", 11, False), + ("test_opensearchpy/test_dsl/test_result.py", 5, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 2, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 6, False), + (datetime(2014, 3, 10, 0, 0), 1, False), + (datetime(2014, 3, 15, 0, 0), 3, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 1, False), + (datetime(2014, 3, 24, 0, 0), 7, False), + (datetime(2014, 4, 20, 0, 0), 1, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 1, False), + (datetime(2014, 5, 1, 0, 0), 1, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 12, False), + ("good", 10, False), + ("better", 13, False), + ] == r.facets.deletions + + +def test_range_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"deletions": "better"}) + + r = cs.execute() + + assert 19 == r.hits.total.value + + +def test_pagination(data_client, commit_search_cls): + cs = commit_search_cls() + cs = cs[0:20] + + assert 52 == cs.count() + assert 20 == len(cs.execute()) diff --git a/test_opensearchpy/test_server/test_helpers/test_index.py b/test_opensearchpy/test_server/test_helpers/test_index.py new file mode 100644 index 00000000..7df4a737 --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/test_index.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from opensearchpy import Date, Document, Index, IndexTemplate, Text +from opensearchpy.helpers import analysis + + +class Post(Document): + title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")) + published_from = Date() + + +def test_index_template_works(write_client): + it = IndexTemplate("test-template", "test-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + it.save() + + i = Index("test-blog") + i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-blog") + + +def test_index_can_be_saved_even_with_settings(write_client): + i = Index("test-blog", using=write_client) + i.settings(number_of_shards=3, number_of_replicas=0) + i.save() + i.settings(number_of_replicas=1) + i.save() + + assert ( + "1" == i.get_settings()["test-blog"]["settings"]["index"]["number_of_replicas"] + ) + + +def test_index_exists(data_client): + assert Index("git").exists() + assert not Index("not-there").exists() + + +def test_index_can_be_created_with_settings_and_mappings(write_client): + i = Index("test-blog", using=write_client) + i.document(Post) + i.settings(number_of_replicas=0, number_of_shards=1) + i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-blog") + + settings = write_client.indices.get_settings(index="test-blog") + assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0" + assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1" + assert settings["test-blog"]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } + + +def test_delete(write_client): + write_client.indices.create( + index="test-index", + body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}}, + ) + + i = Index("test-index", using=write_client) + i.delete() + assert not write_client.indices.exists(index="test-index") + + +def test_multiple_indices_with_same_doc_type_work(write_client): + i1 = Index("test-index-1", using=write_client) + i2 = Index("test-index-2", using=write_client) + + for i in (i1, i2): + i.document(Post) + i.create() + + for i in ("test-index-1", "test-index-2"): + settings = write_client.indices.get_settings(index=i) + assert settings[i]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } diff --git a/test_opensearchpy/test_server/test_helpers/test_mapping.py b/test_opensearchpy/test_server/test_helpers/test_mapping.py new file mode 100644 index 00000000..d5d84469 --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/test_mapping.py @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from opensearchpy import exceptions +from opensearchpy.helpers import analysis, mapping + + +def test_mapping_saved_into_opensearch(write_client): + m = mapping.Mapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + m.field("tags", "keyword") + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "tags": {"type": "keyword"}, + } + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +def test_mapping_saved_into_opensearch_when_index_already_exists_closed(write_client): + m = mapping.Mapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + write_client.indices.create(index="test-mapping") + + with raises(exceptions.IllegalOperation): + m.save("test-mapping", using=write_client) + + write_client.cluster.health(index="test-mapping", wait_for_status="yellow") + write_client.indices.close(index="test-mapping") + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +def test_mapping_saved_into_opensearch_when_index_already_exists_with_analysis( + write_client, +): + m = mapping.Mapping() + analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") + m.field("name", "text", analyzer=analyzer) + + new_analysis = analyzer.get_analysis_definition() + new_analysis["analyzer"]["other_analyzer"] = { + "type": "custom", + "tokenizer": "whitespace", + } + write_client.indices.create( + index="test-mapping", body={"settings": {"analysis": new_analysis}} + ) + + m.field("title", "text", analyzer=analyzer) + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "title": {"type": "text", "analyzer": "my_analyzer"}, + } + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +def test_mapping_gets_updated_from_opensearch(write_client): + write_client.indices.create( + index="test-mapping", + body={ + "settings": {"number_of_shards": 1, "number_of_replicas": 0}, + "mappings": { + "date_detection": False, + "properties": { + "title": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + "created_at": {"type": "date"}, + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + }, + }, + }, + }, + }, + ) + + m = mapping.Mapping.from_opensearch("test-mapping", using=write_client) + + assert ["comments", "created_at", "title"] == list( + sorted(m.properties.properties._d_.keys()) + ) + assert { + "date_detection": False, + "properties": { + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + }, + "created_at": {"type": "date"}, + "title": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + } == m.to_dict() + + # test same with alias + write_client.indices.put_alias(index="test-mapping", name="test-alias") + + m2 = mapping.Mapping.from_opensearch("test-alias", using=write_client) + assert m2.to_dict() == m.to_dict() diff --git a/test_opensearchpy/test_server/test_helpers/test_search.py b/test_opensearchpy/test_server/test_helpers/test_search.py new file mode 100644 index 00000000..90aabbc0 --- /dev/null +++ b/test_opensearchpy/test_server/test_helpers/test_search.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + +from pytest import raises + +from opensearchpy import ( + Date, + Document, + Keyword, + MultiSearch, + Q, + Search, + Text, + TransportError, +) +from opensearchpy.helpers.response import aggs + +from .test_data import FLAT_DATA + + +class Repository(Document): + created_at = Date() + description = Text(analyzer="snowball") + tags = Keyword() + + @classmethod + def search(cls): + return super(Repository, cls).search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(Document): + class Index: + name = "flat-git" + + +def test_filters_aggregation_buckets_are_accessible(data_client): + has_tests_query = Q("term", files="test_opensearchpy/test_dsl") + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket( + "has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query} + ).metric("lines", "stats", field="stats.lines") + response = s.execute() + + assert isinstance( + response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket + ) + assert ( + 35 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count + ) + assert ( + 228 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max + ) + + +def test_top_hits_are_wrapped_in_response(data_client): + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric( + "top_commits", "top_hits", size=5 + ) + response = s.execute() + + top_commits = response.aggregations.top_authors.buckets[0].top_commits + assert isinstance(top_commits, aggs.TopHitsData) + assert 5 == len(top_commits) + + hits = [h for h in top_commits] + assert 5 == len(hits) + assert isinstance(hits[0], Commit) + + +def test_inner_hits_are_wrapped_in_response(data_client): + s = Search(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = s.execute() + + commit = response.hits[0] + assert isinstance(commit.meta.inner_hits.repo, response.__class__) + assert repr(commit.meta.inner_hits.repo[0]).startswith(" 0 + assert not response.timed_out + assert response.updated == 52 + assert response.deleted == 0 + assert response.took > 0 + assert response.success() + + +def test_update_by_query_with_script(write_client, setup_ubq_tests): + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(~Q("exists", field="parent_shas")) + .script(source="ctx._source.is_public = false") + ) + ubq = ubq.params(conflicts="proceed") + + response = ubq.execute() + assert response.total == 2 + assert response.updated == 2 + assert response.version_conflicts == 0 + + +def test_delete_by_query_with_script(write_client, setup_ubq_tests): + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3")) + .script(source='ctx.op = "delete"') + ) + ubq = ubq.params(conflicts="proceed") + + response = ubq.execute() + + assert response.total == 1 + assert response.deleted == 1 + assert response.success() diff --git a/utils/build-dists.py b/utils/build-dists.py index 276b4385..096505b4 100644 --- a/utils/build-dists.py +++ b/utils/build-dists.py @@ -85,7 +85,7 @@ def test_dist(dist): run(venv_python, "-m", "pip", "install", dist) # Test the sync namespaces - run(venv_python, "-c", f"from {dist_name} import OpenSearch") + run(venv_python, "-c", f"from {dist_name} import OpenSearch, Q") run( venv_python, "-c", @@ -173,7 +173,7 @@ def test_dist(dist): run( venv_python, "-c", - f"from {dist_name} import OpenSearch", + f"from {dist_name} import OpenSearch,Q", expect_exit_code=256, ) @@ -181,6 +181,7 @@ def test_dist(dist): def main(): run("git", "checkout", "--", "setup.py", "opensearchpy/") run("rm", "-rf", "build/", "dist/*", "*.egg-info", ".eggs") + run("python", "setup.py", "sdist", "bdist_wheel") # Grab the major version to be used as a suffix. version_path = os.path.join(base_dir, "opensearchpy/_version.py")