From a867bdac158d5275c7d296c1cd794df6e2f64f17 Mon Sep 17 00:00:00 2001 From: saimedhi Date: Thu, 16 Mar 2023 12:59:57 -0700 Subject: [PATCH] Added async support for helpers that are merged from opensearch-dsl-py Signed-off-by: saimedhi --- CHANGELOG.md | 1 + opensearchpy/_async/helpers/__init__.py | 8 + .../_async/{helpers.py => helpers/actions.py} | 8 +- .../{helpers.pyi => helpers/actions.pyi} | 4 +- opensearchpy/_async/helpers/document.py | 437 +++++++ opensearchpy/_async/helpers/document.pyi | 14 + opensearchpy/_async/helpers/faceted_search.py | 200 +++ .../_async/helpers/faceted_search.pyi | 10 + opensearchpy/_async/helpers/index.py | 652 ++++++++++ opensearchpy/_async/helpers/index.pyi | 11 + opensearchpy/_async/helpers/mapping.py | 169 +++ opensearchpy/_async/helpers/mapping.pyi | 10 + opensearchpy/_async/helpers/search.py | 534 ++++++++ opensearchpy/_async/helpers/search.pyi | 13 + opensearchpy/_async/helpers/test.py | 45 + opensearchpy/_async/helpers/test.pyi | 19 + .../_async/helpers/update_by_query.py | 151 +++ .../_async/helpers/update_by_query.pyi | 12 + opensearchpy/connection/async_connections.py | 113 ++ opensearchpy/connection/async_connections.pyi | 10 + opensearchpy/helpers/__init__.py | 2 +- opensearchpy/helpers/__init__.pyi | 8 +- opensearchpy/helpers/analysis.py | 3 +- .../test_async/test_connection.py | 80 +- .../test_async/test_helpers/conftest.py | 227 ++++ .../test_async/test_helpers/test_document.py | 629 ++++++++++ .../test_helpers/test_faceted_search.py | 186 +++ .../test_async/test_helpers/test_index.py | 178 +++ .../test_async/test_helpers/test_mapping.py | 216 ++++ .../test_async/test_helpers/test_search.py | 546 ++++++++ .../test_helpers/test_update_by_query.py | 162 +++ .../test_server/test_helpers/conftest.py | 110 ++ .../test_actions.py} | 93 +- .../test_server/test_helpers/test_data.py | 1097 +++++++++++++++++ .../test_server/test_helpers/test_document.py | 555 +++++++++ .../test_helpers/test_faceted_search.py | 274 ++++ .../test_server/test_helpers/test_index.py | 114 ++ .../test_server/test_helpers/test_mapping.py | 158 +++ .../test_server/test_helpers/test_search.py | 161 +++ .../test_helpers/test_update_by_query.py | 69 ++ 40 files changed, 7226 insertions(+), 63 deletions(-) create mode 100644 opensearchpy/_async/helpers/__init__.py rename opensearchpy/_async/{helpers.py => helpers/actions.py} (99%) rename opensearchpy/_async/{helpers.pyi => helpers/actions.pyi} (98%) create mode 100644 opensearchpy/_async/helpers/document.py create mode 100644 opensearchpy/_async/helpers/document.pyi create mode 100644 opensearchpy/_async/helpers/faceted_search.py create mode 100644 opensearchpy/_async/helpers/faceted_search.pyi create mode 100644 opensearchpy/_async/helpers/index.py create mode 100644 opensearchpy/_async/helpers/index.pyi create mode 100644 opensearchpy/_async/helpers/mapping.py create mode 100644 opensearchpy/_async/helpers/mapping.pyi create mode 100644 opensearchpy/_async/helpers/search.py create mode 100644 opensearchpy/_async/helpers/search.pyi create mode 100644 opensearchpy/_async/helpers/test.py create mode 100644 opensearchpy/_async/helpers/test.pyi create mode 100644 opensearchpy/_async/helpers/update_by_query.py create mode 100644 opensearchpy/_async/helpers/update_by_query.pyi create mode 100644 opensearchpy/connection/async_connections.py create mode 100644 opensearchpy/connection/async_connections.pyi create mode 100644 test_opensearchpy/test_async/test_helpers/conftest.py create mode 100644 test_opensearchpy/test_async/test_helpers/test_document.py create mode 100644 test_opensearchpy/test_async/test_helpers/test_faceted_search.py create mode 100644 test_opensearchpy/test_async/test_helpers/test_index.py create mode 100644 test_opensearchpy/test_async/test_helpers/test_mapping.py create mode 100644 test_opensearchpy/test_async/test_helpers/test_search.py create mode 100644 test_opensearchpy/test_async/test_helpers/test_update_by_query.py create mode 100644 test_opensearchpy/test_async/test_server/test_helpers/conftest.py rename test_opensearchpy/test_async/test_server/{test_helpers.py => test_helpers/test_actions.py} (91%) create mode 100644 test_opensearchpy/test_async/test_server/test_helpers/test_data.py create mode 100644 test_opensearchpy/test_async/test_server/test_helpers/test_document.py create mode 100644 test_opensearchpy/test_async/test_server/test_helpers/test_faceted_search.py create mode 100644 test_opensearchpy/test_async/test_server/test_helpers/test_index.py create mode 100644 test_opensearchpy/test_async/test_server/test_helpers/test_mapping.py create mode 100644 test_opensearchpy/test_async/test_server/test_helpers/test_search.py create mode 100644 test_opensearchpy/test_async/test_server/test_helpers/test_update_by_query.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9635adc6..648e5d6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [Unreleased] ### Added +- Added async support for helpers that are merged from opensearch-dsl-py ([#329](https://github.com/opensearch-project/opensearch-py/pull/329)) ### Changed ### Deprecated ### Removed diff --git a/opensearchpy/_async/helpers/__init__.py b/opensearchpy/_async/helpers/__init__.py new file mode 100644 index 00000000..6c0097cd --- /dev/null +++ b/opensearchpy/_async/helpers/__init__.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. diff --git a/opensearchpy/_async/helpers.py b/opensearchpy/_async/helpers/actions.py similarity index 99% rename from opensearchpy/_async/helpers.py rename to opensearchpy/_async/helpers/actions.py index 28527781..323a6668 100644 --- a/opensearchpy/_async/helpers.py +++ b/opensearchpy/_async/helpers/actions.py @@ -32,15 +32,15 @@ import asyncio import logging -from ..compat import map -from ..exceptions import TransportError -from ..helpers.actions import ( +from ...compat import map +from ...exceptions import TransportError +from ...helpers.actions import ( _ActionChunker, _process_bulk_chunk_error, _process_bulk_chunk_success, expand_action, ) -from ..helpers.errors import ScanError +from ...helpers.errors import ScanError logger = logging.getLogger("opensearchpy.helpers") diff --git a/opensearchpy/_async/helpers.pyi b/opensearchpy/_async/helpers/actions.pyi similarity index 98% rename from opensearchpy/_async/helpers.pyi rename to opensearchpy/_async/helpers/actions.pyi index 366ad172..be000ae8 100644 --- a/opensearchpy/_async/helpers.pyi +++ b/opensearchpy/_async/helpers/actions.pyi @@ -41,8 +41,8 @@ from typing import ( Union, ) -from ..serializer import Serializer -from .client import AsyncOpenSearch +from ...serializer import Serializer +from ..client import AsyncOpenSearch logger: logging.Logger diff --git a/opensearchpy/_async/helpers/document.py b/opensearchpy/_async/helpers/document.py new file mode 100644 index 00000000..7f796a86 --- /dev/null +++ b/opensearchpy/_async/helpers/document.py @@ -0,0 +1,437 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from fnmatch import fnmatch + +from six import add_metaclass + +from opensearchpy._async.helpers.index import AsyncIndex +from opensearchpy._async.helpers.search import AsyncSearch +from opensearchpy.connection.async_connections import get_connection +from opensearchpy.exceptions import ( + IllegalOperation, + NotFoundError, + RequestError, + ValidationException, +) +from opensearchpy.helpers.document import DocumentMeta +from opensearchpy.helpers.utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge + + +class AsyncIndexMeta(DocumentMeta): + # global flag to guard us from associating an Index with the base Document + # class, only user defined subclasses should have an _index attr + _document_initialized = False + + def __new__(cls, name, bases, attrs): + new_cls = super(AsyncIndexMeta, cls).__new__(cls, name, bases, attrs) + if cls._document_initialized: + index_opts = attrs.pop("Index", None) + index = cls.construct_index(index_opts, bases) + new_cls._index = index + index.document(new_cls) + cls._document_initialized = True + return new_cls + + @classmethod + def construct_index(cls, opts, bases): + if opts is None: + for b in bases: + if hasattr(b, "_index"): + return b._index + + # Set None as Index name so it will set _all while making the query + return AsyncIndex(name=None) + + i = AsyncIndex( + getattr(opts, "name", "*"), using=getattr(opts, "using", "default") + ) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): + i.analyzer(a) + return i + + +@add_metaclass(AsyncIndexMeta) +class AsyncDocument(ObjectBase): + """ + Model-like class for persisting documents in opensearch. + """ + + @classmethod + def _matches(cls, hit): + if cls._index._name is None: + return True + return fnmatch(hit.get("_index", ""), cls._index._name) + + @classmethod + def _get_using(cls, using=None): + return using or cls._index._using + + @classmethod + async def _get_connection(cls, using=None): + return await get_connection(cls._get_using(using)) + + @classmethod + def _default_index(cls, index=None): + return index or cls._index._name + + @classmethod + async def init(cls, index=None, using=None): + """ + Create the index and populate the mappings in opensearch. + """ + i = cls._index + if index: + i = i.clone(name=index) + await i.save(using=using) + + def _get_index(self, index=None, required=True): + if index is None: + index = getattr(self.meta, "index", None) + if index is None: + index = getattr(self._index, "_name", None) + if index is None and required: + raise ValidationException("No index") + if index and "*" in index: + raise ValidationException("You cannot write to a wildcard index.") + return index + + def __repr__(self): + return "{}({})".format( + self.__class__.__name__, + ", ".join( + "{}={!r}".format(key, getattr(self.meta, key)) + for key in ("index", "id") + if key in self.meta + ), + ) + + @classmethod + def search(cls, using=None, index=None): + """ + Create an :class:`~opensearchpy.AsyncSearch` instance that will search + over this ``Document``. + """ + return AsyncSearch( + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] + ) + + @classmethod + async def get(cls, id, using=None, index=None, **kwargs): + """ + Retrieve a single document from opensearch using its ``id``. + + :arg id: ``id`` of the document to be retrieved + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.get`` unchanged. + """ + opensearch = await cls._get_connection(using) + doc = await opensearch.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): + return None + return cls.from_opensearch(doc) + + @classmethod + async def exists(cls, id, using=None, index=None, **kwargs): + """ + check if exists a single document from opensearch using its ``id``. + + :arg id: ``id`` of the document to check if exists + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.exists`` unchanged. + """ + opensearch = await cls._get_connection(using) + return await opensearch.exists(index=cls._default_index(index), id=id, **kwargs) + + @classmethod + async def mget( + cls, docs, using=None, index=None, raise_on_error=True, missing="none", **kwargs + ): + r""" + Retrieve multiple document by their ``id``\s. Returns a list of instances + in the same order as requested. + + :arg docs: list of ``id``\s of the documents to be retrieved or a list + of document specifications as per + https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/multi-get/ + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg missing: what to do when one of the documents requested is not + found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise + ``NotFoundError``) or ``'skip'`` (ignore the missing document). + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.mget`` unchanged. + """ + if missing not in ("raise", "skip", "none"): + raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") + opensearch = await cls._get_connection(using) + body = { + "docs": [ + doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc} + for doc in docs + ] + } + results = await opensearch.mget(body, index=cls._default_index(index), **kwargs) + + objs, error_docs, missing_docs = [], [], [] + for doc in results["docs"]: + if doc.get("found"): + if error_docs or missing_docs: + # We're going to raise an exception anyway, so avoid an + # expensive call to cls.from_opensearch(). + continue + + objs.append(cls.from_opensearch(doc)) + + elif doc.get("error"): + if raise_on_error: + error_docs.append(doc) + if missing == "none": + objs.append(None) + + # The doc didn't cause an error, but the doc also wasn't found. + elif missing == "raise": + missing_docs.append(doc) + elif missing == "none": + objs.append(None) + + if error_docs: + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) + raise RequestError(400, message, error_docs) + if missing_docs: + missing_ids = [doc["_id"] for doc in missing_docs] + message = "Documents %s not found." % ", ".join(missing_ids) + raise NotFoundError(404, message, {"docs": missing_docs}) + return objs + + async def delete(self, using=None, index=None, **kwargs): + """ + Delete the instance in opensearch. + + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.delete`` unchanged. + """ + opensearch = await self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + await opensearch.delete(index=self._get_index(index), **doc_meta) + + def to_dict(self, include_meta=False, skip_empty=True): + """ + Serialize the instance into a dictionary so that it can be saved in opensearch. + + :arg include_meta: if set to ``True`` will include all the metadata + (``_index``, ``_id`` etc). Otherwise just the document's + data is serialized. + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in opensearch. + """ + d = super(AsyncDocument, self).to_dict(skip_empty=skip_empty) + if not include_meta: + return d + + meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # in case of to_dict include the index unlike save/update/delete + index = self._get_index(required=False) + if index is not None: + meta["_index"] = index + + meta["_source"] = d + return meta + + async def update( + self, + using=None, + index=None, + detect_noop=True, + doc_as_upsert=False, + refresh=False, + retry_on_conflict=None, + script=None, + script_id=None, + scripted_upsert=False, + upsert=None, + return_doc_meta=False, + **fields + ): + """ + Partial update of the document, specify fields you wish to update and + both the instance and the document in opensearch will be updated:: + + doc = MyDocument(title='Document Title!') + doc.save() + doc.update(title='New Document Title!') + + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg detect_noop: Set to ``False`` to disable noop detection. + :arg refresh: Control when the changes made by this request are visible + to search. Set to ``True`` for immediate effect. + :arg retry_on_conflict: In between the get and indexing phases of the + update, it is possible that another process might have already + updated the same document. By default, the update will fail with a + version conflict exception. The retry_on_conflict parameter + controls how many times to retry the update before finally throwing + an exception. + :arg doc_as_upsert: Instead of sending a partial doc plus an upsert + doc, setting doc_as_upsert to true will use the contents of doc as + the upsert value + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result + + :return operation result noop/updated + """ + body = { + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, + } + + # scripted update + if script or script_id: + if upsert is not None: + body["upsert"] = upsert + + if script: + script = {"source": script} + else: + script = {"id": script_id} + + script["params"] = fields + + body["script"] = script + body["scripted_upsert"] = scripted_upsert + + # partial document update + else: + if not fields: + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) + + # update given fields locally + merge(self, fields) + + # prepare data for OpenSearch + values = self.to_dict() + + # if fields were given: partial update + body["doc"] = {k: values.get(k) for k in fields.keys()} + + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + if retry_on_conflict is not None: + doc_meta["retry_on_conflict"] = retry_on_conflict + + # Optimistic concurrency control + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + meta = await (await self._get_connection(using)).update( + index=self._get_index(index), body=body, refresh=refresh, **doc_meta + ) + # update meta information from OpenSearch + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + async def save( + self, + using=None, + index=None, + validate=True, + skip_empty=True, + return_doc_meta=False, + **kwargs + ): + """ + Save the document into opensearch. If the document doesn't exist it + is created, it is overwritten otherwise. Returns ``True`` if this + operations resulted in new document being created. + + :arg index: opensearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg validate: set to ``False`` to skip validating the document + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in opensearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.index`` unchanged. + + :return operation result created/updated + """ + if validate: + self.full_clean() + + opensearch = await self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + meta = await opensearch.index( + index=self._get_index(index), + body=self.to_dict(skip_empty=skip_empty), + **doc_meta + ) + # update meta information from OpenSearch + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] diff --git a/opensearchpy/_async/helpers/document.pyi b/opensearchpy/_async/helpers/document.pyi new file mode 100644 index 00000000..71eb4ef4 --- /dev/null +++ b/opensearchpy/_async/helpers/document.pyi @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from opensearchpy.helpers.document import DocumentMeta +from opensearchpy.helpers.utils import ObjectBase + +class AsyncIndexMeta(DocumentMeta): ... +class AsyncDocument(ObjectBase): ... diff --git a/opensearchpy/_async/helpers/faceted_search.py b/opensearchpy/_async/helpers/faceted_search.py new file mode 100644 index 00000000..c6ca4385 --- /dev/null +++ b/opensearchpy/_async/helpers/faceted_search.py @@ -0,0 +1,200 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + + +from six import iteritems, itervalues + +from opensearchpy._async.helpers.search import AsyncSearch +from opensearchpy.helpers.faceted_search import FacetedResponse +from opensearchpy.helpers.query import MatchAll + + +class AsyncFacetedSearch(object): + """ + Abstraction for creating faceted navigation searches that takes care of + composing the queries, aggregations and filters as needed as well as + presenting the results in an easy-to-consume fashion:: + + class BlogSearch(AsyncFacetedSearch): + index = 'blogs' + doc_types = [Blog, Post] + fields = ['title^5', 'category', 'description', 'body'] + + facets = { + 'type': TermsFacet(field='_type'), + 'category': TermsFacet(field='category'), + 'weekly_posts': DateHistogramFacet(field='published_from', interval='week') + } + + def search(self): + ' Override search to add your own filters ' + s = super(BlogSearch, self).search() + return s.filter('term', published=True) + + # when using: + blog_search = BlogSearch("web framework", filters={"category": "python"}) + + # supports pagination + blog_search[10:20] + + response = await blog_search.execute() + + # easy access to aggregation results: + for category, hit_count, is_selected in response.facets.category: + print( + "Category %s has %d hits%s." % ( + category, + hit_count, + ' and is chosen' if is_selected else '' + ) + ) + + """ + + index = None + doc_types = None + fields = None + facets = {} + using = "default" + + def __init__(self, query=None, filters={}, sort=()): + """ + :arg query: the text to search for + :arg filters: facet values to filter + :arg sort: sort information to be passed to :class:`~opensearchpy.AsyncSearch` + """ + self._query = query + self._filters = {} + self._sort = sort + self.filter_values = {} + for name, value in iteritems(filters): + self.add_filter(name, value) + + self._s = self.build_search() + + async def count(self): + return await self._s.count() + + def __getitem__(self, k): + self._s = self._s[k] + return self + + def __iter__(self): + return iter(self._s) + + def add_filter(self, name, filter_values): + """ + Add a filter for a facet. + """ + # normalize the value into a list + if not isinstance(filter_values, (tuple, list)): + if filter_values is None: + return + filter_values = [ + filter_values, + ] + + # remember the filter values for use in FacetedResponse + self.filter_values[name] = filter_values + + # get the filter from the facet + f = self.facets[name].add_filter(filter_values) + if f is None: + return + + self._filters[name] = f + + def search(self): + """ + Returns the base Search object to which the facets are added. + + You can customize the query by overriding this method and returning a + modified search object. + """ + s = AsyncSearch(doc_type=self.doc_types, index=self.index, using=self.using) + return s.response_class(FacetedResponse) + + def query(self, search, query): + """ + Add query part to ``search``. + + Override this if you wish to customize the query used. + """ + if query: + if self.fields: + return search.query("multi_match", fields=self.fields, query=query) + else: + return search.query("multi_match", query=query) + return search + + def aggregate(self, search): + """ + Add aggregations representing the facets selected, including potential + filters. + """ + for f, facet in iteritems(self.facets): + agg = facet.get_aggregation() + agg_filter = MatchAll() + for field, filter in iteritems(self._filters): + if f == field: + continue + agg_filter &= filter + search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket( + f, agg + ) + + def filter(self, search): + """ + Add a ``post_filter`` to the search request narrowing the results based + on the facet filters. + """ + if not self._filters: + return search + + post_filter = MatchAll() + for f in itervalues(self._filters): + post_filter &= f + return search.post_filter(post_filter) + + def highlight(self, search): + """ + Add highlighting for all the fields + """ + return search.highlight( + *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields) + ) + + def sort(self, search): + """ + Add sorting information to the request. + """ + if self._sort: + search = search.sort(*self._sort) + return search + + def build_search(self): + """ + Construct the ``AsyncSearch`` object. + """ + s = self.search() + s = self.query(s, self._query) + s = self.filter(s) + if self.fields: + s = self.highlight(s) + s = self.sort(s) + self.aggregate(s) + return s + + async def execute(self): + """ + Execute the search and return the response. + """ + r = await self._s.execute() + r._faceted_search = self + return r diff --git a/opensearchpy/_async/helpers/faceted_search.pyi b/opensearchpy/_async/helpers/faceted_search.pyi new file mode 100644 index 00000000..443e87c5 --- /dev/null +++ b/opensearchpy/_async/helpers/faceted_search.pyi @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +class AsyncFacetedSearch(object): ... diff --git a/opensearchpy/_async/helpers/index.py b/opensearchpy/_async/helpers/index.py new file mode 100644 index 00000000..c3e5a371 --- /dev/null +++ b/opensearchpy/_async/helpers/index.py @@ -0,0 +1,652 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from opensearchpy._async.helpers.mapping import AsyncMapping +from opensearchpy._async.helpers.search import AsyncSearch +from opensearchpy._async.helpers.update_by_query import AsyncUpdateByQuery +from opensearchpy.connection.async_connections import get_connection +from opensearchpy.exceptions import IllegalOperation +from opensearchpy.helpers import analysis +from opensearchpy.helpers.utils import merge + + +class AsyncIndexTemplate(object): + def __init__(self, name, template, index=None, order=None, **kwargs): + if index is None: + self._index = AsyncIndex(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.order = order + + def __getattr__(self, attr_name): + return getattr(self._index, attr_name) + + def to_dict(self): + d = self._index.to_dict() + d["index_patterns"] = [self._index._name] + if self.order is not None: + d["order"] = self.order + return d + + async def save(self, using=None): + opensearch = await get_connection(using or self._index._using) + return await opensearch.indices.put_template( + name=self._template_name, body=self.to_dict() + ) + + +class AsyncIndex(object): + def __init__(self, name, using="default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + self._name = name + self._doc_types = [] + self._using = using + self._settings = {} + self._aliases = {} + self._analysis = {} + self._mapping = None + + def get_or_create_mapping(self): + if self._mapping is None: + self._mapping = AsyncMapping() + return self._mapping + + def as_template(self, template_name, pattern=None, order=None): + # TODO: should we allow pattern to be a top-level arg? + # or maybe have an IndexPattern that allows for it and have + # AsyncDocument._index be that? + return AsyncIndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) + + def resolve_nested(self, field_path): + for doc in self._doc_types: + nested, field = doc._doc_type.mapping.resolve_nested(field_path) + if field is not None: + return nested, field + if self._mapping: + return self._mapping.resolve_nested(field_path) + return (), None + + def resolve_field(self, field_path): + for doc in self._doc_types: + field = doc._doc_type.mapping.resolve_field(field_path) + if field is not None: + return field + if self._mapping: + return self._mapping.resolve_field(field_path) + return None + + async def load_mappings(self, using=None): + await self.get_or_create_mapping().update_from_opensearch( + self._name, using=using or self._using + ) + + def clone(self, name=None, using=None): + """ + Create a copy of the instance with another name or connection alias. + Useful for creating multiple indices with shared configuration:: + + i = AsyncIndex('base-index') + i.settings(number_of_shards=1) + await i.create() + + i2 = i.clone('other-index') + await i2.create() + + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + i = AsyncIndex(name or self._name, using=using or self._using) + i._settings = self._settings.copy() + i._aliases = self._aliases.copy() + i._analysis = self._analysis.copy() + i._doc_types = self._doc_types[:] + if self._mapping is not None: + i._mapping = self._mapping._clone() + return i + + async def _get_connection(self, using=None): + if self._name is None: + raise ValueError("You cannot perform API calls on the default index.") + return await get_connection(using or self._using) + + connection = property(_get_connection) + + def mapping(self, mapping): + """ + Associate a mapping (an instance of + :class:`~opensearchpy.AsyncMapping`) with this index. + This means that, when this index is created, it will contain the + mappings for the document type defined by those mappings. + """ + self.get_or_create_mapping().update(mapping) + + def document(self, document): + """ + Associate a :class:`~opensearchpy.AsyncDocument` subclass with an index. + This means that, when this index is created, it will contain the + mappings for the ``AsyncDocument``. If the ``AsyncDocument`` class doesn't have a + default index yet (by defining ``class AsyncIndex``), this instance will be + used. Can be used as a decorator:: + + i = AsyncIndex('blog') + + @i.document + class Post(AsyncDocument): + title = Text() + + # create the index, including Post mappings + await i.create() + + # .search() will now return a AsyncSearch object that will return + # properly deserialized Post instances + s = i.search() + """ + self._doc_types.append(document) + + # If the document index does not have any name, that means the user + # did not set any index already to the document. + # So set this index as document index + if document._index._name is None: + document._index = self + + return document + + def settings(self, **kwargs): + """ + Add settings to the index:: + + i = AsyncIndex('i') + i.settings(number_of_shards=1, number_of_replicas=0) + + Multiple calls to ``settings`` will merge the keys, later overriding + the earlier. + """ + self._settings.update(kwargs) + return self + + def aliases(self, **kwargs): + """ + Add aliases to the index definition:: + + i = AsyncIndex('blog-v2') + i.aliases(blog={}, published={'filter': Q('term', published=True)}) + """ + self._aliases.update(kwargs) + return self + + def analyzer(self, *args, **kwargs): + """ + Explicitly add an analyzer to an index. Note that all custom analyzers + defined in mappings will also be created. This is useful for search analyzers. + + Example:: + + from opensearchpy import analyzer, tokenizer + + my_analyzer = analyzer('my_analyzer', + tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), + filter=['lowercase'] + ) + + i = AsyncIndex('blog') + i.analyzer(my_analyzer) + + """ + analyzer = analysis.analyzer(*args, **kwargs) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + return + + # merge the definition + merge(self._analysis, d, True) + + def to_dict(self): + out = {} + if self._settings: + out["settings"] = self._settings + if self._aliases: + out["aliases"] = self._aliases + mappings = self._mapping.to_dict() if self._mapping else {} + analysis = self._mapping._collect_analysis() if self._mapping else {} + for d in self._doc_types: + mapping = d._doc_type.mapping + merge(mappings, mapping.to_dict(), True) + merge(analysis, mapping._collect_analysis(), True) + if mappings: + out["mappings"] = mappings + if analysis or self._analysis: + merge(analysis, self._analysis) + out.setdefault("settings", {})["analysis"] = analysis + return out + + def search(self, using=None): + """ + Return a :class:`~opensearchpy.AsyncSearch` object searching over the + index (or all the indices belonging to this template) and its + ``Document``\\s. + """ + return AsyncSearch( + using=using or self._using, index=self._name, doc_type=self._doc_types + ) + + def updateByQuery(self, using=None): + """ + Return a :class:`~opensearchpy.AsyncUpdateByQuery` object searching over the index + (or all the indices belonging to this template) and updating Documents that match + the search criteria. + + For more information, see here: + https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/update-by-query/ + """ + return AsyncUpdateByQuery( + using=using or self._using, + index=self._name, + ) + + async def create(self, using=None, **kwargs): + """ + Creates the index in opensearch. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.create`` unchanged. + """ + return await (await self._get_connection(using)).indices.create( + index=self._name, body=self.to_dict(), **kwargs + ) + + async def is_closed(self, using=None): + state = await (await self._get_connection(using)).cluster.state( + index=self._name, metric="metadata" + ) + return state["metadata"]["indices"][self._name]["state"] == "close" + + async def save(self, using=None): + """ + Sync the index definition with opensearch, creating the index if it + doesn't exist and updating its settings and mappings if it does. + + Note some settings and mapping changes cannot be done on an open + index (or at all on an existing index) and for those this method will + fail with the underlying exception. + """ + if not await self.exists(using=using): + return await self.create(using=using) + + body = self.to_dict() + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = (await self.get_settings(using=using))[self._name][ + "settings" + ]["index"] + if analysis: + if await self.is_closed(using=using): + # closed index, update away + settings["analysis"] = analysis + else: + # compare analysis definition, if all analysis objects are + # already defined as requested, skip analysis update and + # proceed, otherwise raise IllegalOperation + existing_analysis = current_settings.get("analysis", {}) + if any( + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] + for section in analysis + for k in analysis[section] + ): + raise IllegalOperation( + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) + + # try and update the settings + if settings: + settings = settings.copy() + for k, v in list(settings.items()): + if k in current_settings and current_settings[k] == str(v): + del settings[k] + + if settings: + await self.put_settings(using=using, body=settings) + + # update the mappings, any conflict in the mappings will result in an + # exception + mappings = body.pop("mappings", {}) + if mappings: + await self.put_mapping(using=using, body=mappings) + + async def analyze(self, using=None, **kwargs): + """ + Perform the analysis process on a text and return the tokens breakdown + of the text. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.analyze`` unchanged. + """ + return await (await self._get_connection(using)).indices.analyze( + index=self._name, **kwargs + ) + + async def refresh(self, using=None, **kwargs): + """ + Performs a refresh operation on the index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.refresh`` unchanged. + """ + return await (await self._get_connection(using)).indices.refresh( + index=self._name, **kwargs + ) + + async def flush(self, using=None, **kwargs): + """ + Performs a flush operation on the index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.flush`` unchanged. + """ + return await (await self._get_connection(using)).indices.flush( + index=self._name, **kwargs + ) + + async def get(self, using=None, **kwargs): + """ + The get index API allows to retrieve information about the index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.get`` unchanged. + """ + return await (await self._get_connection(using)).indices.get( + index=self._name, **kwargs + ) + + async def open(self, using=None, **kwargs): + """ + Opens the index in opensearch. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.open`` unchanged. + """ + return await (await self._get_connection(using)).indices.open( + index=self._name, **kwargs + ) + + async def close(self, using=None, **kwargs): + """ + Closes the index in opensearch. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.close`` unchanged. + """ + return await (await self._get_connection(using)).indices.close( + index=self._name, **kwargs + ) + + async def delete(self, using=None, **kwargs): + """ + Deletes the index in opensearch. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.delete`` unchanged. + """ + return await (await self._get_connection(using)).indices.delete( + index=self._name, **kwargs + ) + + async def exists(self, using=None, **kwargs): + """ + Returns ``True`` if the index already exists in opensearch. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.exists`` unchanged. + """ + return await (await self._get_connection(using)).indices.exists( + index=self._name, **kwargs + ) + + async def put_mapping(self, using=None, **kwargs): + """ + Register specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.put_mapping`` unchanged. + """ + return await (await self._get_connection(using)).indices.put_mapping( + index=self._name, **kwargs + ) + + async def get_mapping(self, using=None, **kwargs): + """ + Retrieve specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.get_mapping`` unchanged. + """ + return await (await self._get_connection(using)).indices.get_mapping( + index=self._name, **kwargs + ) + + async def get_field_mapping(self, using=None, **kwargs): + """ + Retrieve mapping definition of a specific field. + + Any additional keyword arguments will be passed to + ``Async OpenSearch.indices.get_field_mapping`` unchanged. + """ + return await (await self._get_connection(using)).indices.get_field_mapping( + index=self._name, **kwargs + ) + + async def put_alias(self, using=None, **kwargs): + """ + Create an alias for the index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.put_alias`` unchanged. + """ + return await (await self._get_connection(using)).indices.put_alias( + index=self._name, **kwargs + ) + + async def exists_alias(self, using=None, **kwargs): + """ + Return a boolean indicating whether given alias exists for this index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.exists_alias`` unchanged. + """ + return await (await self._get_connection(using)).indices.exists_alias( + index=self._name, **kwargs + ) + + async def get_alias(self, using=None, **kwargs): + """ + Retrieve a specified alias. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.get_alias`` unchanged. + """ + return await (await self._get_connection(using)).indices.get_alias( + index=self._name, **kwargs + ) + + async def delete_alias(self, using=None, **kwargs): + """ + Delete specific alias. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.delete_alias`` unchanged. + """ + return await (await self._get_connection(using)).indices.delete_alias( + index=self._name, **kwargs + ) + + async def get_settings(self, using=None, **kwargs): + """ + Retrieve settings for the index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.get_settings`` unchanged. + """ + return await (await self._get_connection(using)).indices.get_settings( + index=self._name, **kwargs + ) + + async def put_settings(self, using=None, **kwargs): + """ + Change specific index level settings in real time. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.put_settings`` unchanged. + """ + return await (await self._get_connection(using)).indices.put_settings( + index=self._name, **kwargs + ) + + async def stats(self, using=None, **kwargs): + """ + Retrieve statistics on different operations happening on the index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.stats`` unchanged. + """ + return await (await self._get_connection(using)).indices.stats( + index=self._name, **kwargs + ) + + async def segments(self, using=None, **kwargs): + """ + Provide low level segments information that a Lucene index (shard + level) is built with. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.segments`` unchanged. + """ + return await (await self._get_connection(using)).indices.segments( + index=self._name, **kwargs + ) + + async def validate_query(self, using=None, **kwargs): + """ + Validate a potentially expensive query without executing it. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.validate_query`` unchanged. + """ + return await (await self._get_connection(using)).indices.validate_query( + index=self._name, **kwargs + ) + + async def clear_cache(self, using=None, **kwargs): + """ + Clear all caches or specific cached associated with the index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.clear_cache`` unchanged. + """ + return await (await self._get_connection(using)).indices.clear_cache( + index=self._name, **kwargs + ) + + async def recovery(self, using=None, **kwargs): + """ + The indices recovery API provides insight into on-going shard + recoveries for the index. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.recovery`` unchanged. + """ + return await (await self._get_connection(using)).indices.recovery( + index=self._name, **kwargs + ) + + async def upgrade(self, using=None, **kwargs): + """ + Upgrade the index to the latest format. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.upgrade`` unchanged. + """ + return await (await self._get_connection(using)).indices.upgrade( + index=self._name, **kwargs + ) + + async def get_upgrade(self, using=None, **kwargs): + """ + Monitor how much of the index is upgraded. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.get_upgrade`` unchanged. + """ + return await (await self._get_connection(using)).indices.get_upgrade( + index=self._name, **kwargs + ) + + async def shard_stores(self, using=None, **kwargs): + """ + Provides store information for shard copies of the index. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.shard_stores`` unchanged. + """ + return await (await self._get_connection(using)).indices.shard_stores( + index=self._name, **kwargs + ) + + async def forcemerge(self, using=None, **kwargs): + """ + The force merge API allows to force merging of the index through an + API. The merge relates to the number of segments a Lucene index holds + within each shard. The force merge operation allows to reduce the + number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.forcemerge`` unchanged. + """ + return await (await self._get_connection(using)).indices.forcemerge( + index=self._name, **kwargs + ) + + async def shrink(self, using=None, **kwargs): + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + + Any additional keyword arguments will be passed to + ``AsyncOpenSearch.indices.shrink`` unchanged. + """ + return await (await self._get_connection(using)).indices.shrink( + index=self._name, **kwargs + ) diff --git a/opensearchpy/_async/helpers/index.pyi b/opensearchpy/_async/helpers/index.pyi new file mode 100644 index 00000000..5b9d8720 --- /dev/null +++ b/opensearchpy/_async/helpers/index.pyi @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +class AsyncIndexTemplate(object): ... +class AsyncIndex(object): ... diff --git a/opensearchpy/_async/helpers/mapping.py b/opensearchpy/_async/helpers/mapping.py new file mode 100644 index 00000000..1ccec472 --- /dev/null +++ b/opensearchpy/_async/helpers/mapping.py @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from itertools import chain + +from six import iteritems + +from opensearchpy.connection.async_connections import get_connection +from opensearchpy.helpers.field import Nested, Text +from opensearchpy.helpers.mapping import META_FIELDS, Properties + + +class AsyncMapping(object): + def __init__(self): + self.properties = Properties() + self._meta = {} + + def __repr__(self): + return "Mapping()" + + def _clone(self): + m = AsyncMapping() + m.properties._params = self.properties._params.copy() + return m + + @classmethod + async def from_opensearch(cls, index, using="default"): + m = cls() + await m.update_from_opensearch(index, using) + return m + + def resolve_nested(self, field_path): + field = self + nested = [] + parts = field_path.split(".") + for i, step in enumerate(parts): + try: + field = field[step] + except KeyError: + return (), None + if isinstance(field, Nested): + nested.append(".".join(parts[: i + 1])) + return nested, field + + def resolve_field(self, field_path): + field = self + for step in field_path.split("."): + try: + field = field[step] + except KeyError: + return + return field + + def _collect_analysis(self): + analysis = {} + fields = [] + if "_all" in self._meta: + fields.append(Text(**self._meta["_all"])) + + for f in chain(fields, self.properties._collect_fields()): + for analyzer_name in ( + "analyzer", + "normalizer", + "search_analyzer", + "search_quote_analyzer", + ): + if not hasattr(f, analyzer_name): + continue + analyzer = getattr(f, analyzer_name) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + continue + + # merge the definition + # TODO: conflict detection/resolution + for key in d: + analysis.setdefault(key, {}).update(d[key]) + + return analysis + + async def save(self, index, using="default"): + from opensearchpy._async.helpers.index import AsyncIndex + + index = AsyncIndex(index, using=using) + index.mapping(self) + return await index.save() + + async def update_from_opensearch(self, index, using="default"): + opensearch = await get_connection(using) + raw = await opensearch.indices.get_mapping(index=index) + _, raw = raw.popitem() + self._update_from_dict(raw["mappings"]) + + def _update_from_dict(self, raw): + for name, definition in iteritems(raw.get("properties", {})): + self.field(name, definition) + + # metadata like _all etc + for name, value in iteritems(raw): + if name != "properties": + if isinstance(value, collections_abc.Mapping): + self.meta(name, **value) + else: + self.meta(name, value) + + def update(self, mapping, update_only=False): + for name in mapping: + if update_only and name in self: + # nested and inner objects, merge recursively + if hasattr(self[name], "update"): + # FIXME only merge subfields, not the settings + self[name].update(mapping[name], update_only) + continue + self.field(name, mapping[name]) + + if update_only: + for name in mapping._meta: + if name not in self._meta: + self._meta[name] = mapping._meta[name] + else: + self._meta.update(mapping._meta) + + def __contains__(self, name): + return name in self.properties.properties + + def __getitem__(self, name): + return self.properties.properties[name] + + def __iter__(self): + return iter(self.properties.properties) + + def field(self, *args, **kwargs): + self.properties.field(*args, **kwargs) + return self + + def meta(self, name, params=None, **kwargs): + if not name.startswith("_") and name not in META_FIELDS: + name = "_" + name + + if params and kwargs: + raise ValueError("Meta configs cannot have both value and a dictionary.") + + self._meta[name] = kwargs if params is None else params + return self + + def to_dict(self): + meta = self._meta + + # hard coded serialization of analyzers in _all + if "_all" in meta: + meta = meta.copy() + _all = meta["_all"] = meta["_all"].copy() + for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): + if hasattr(_all.get(f, None), "to_dict"): + _all[f] = _all[f].to_dict() + meta.update(self.properties.to_dict()) + return meta diff --git a/opensearchpy/_async/helpers/mapping.pyi b/opensearchpy/_async/helpers/mapping.pyi new file mode 100644 index 00000000..61505f42 --- /dev/null +++ b/opensearchpy/_async/helpers/mapping.pyi @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +class AsyncMapping(object): ... diff --git a/opensearchpy/_async/helpers/search.py b/opensearchpy/_async/helpers/search.py new file mode 100644 index 00000000..bd6884cf --- /dev/null +++ b/opensearchpy/_async/helpers/search.py @@ -0,0 +1,534 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import copy + +from six import iteritems, string_types + +from opensearchpy._async.helpers.actions import aiter, async_scan +from opensearchpy.connection.async_connections import get_connection +from opensearchpy.exceptions import IllegalOperation, TransportError +from opensearchpy.helpers.aggs import A +from opensearchpy.helpers.query import Bool, Q +from opensearchpy.helpers.response import Response +from opensearchpy.helpers.search import AggsProxy, ProxyDescriptor, QueryProxy, Request +from opensearchpy.helpers.utils import AttrDict, recursive_to_dict + + +class AsyncSearch(Request): + query = ProxyDescriptor("query") + post_filter = ProxyDescriptor("post_filter") + + def __init__(self, **kwargs): + """ + Search request to opensearch. + + :arg using: `AsyncOpenSearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + """ + super(AsyncSearch, self).__init__(**kwargs) + + self.aggs = AggsProxy(self) + self._sort = [] + self._source = None + self._highlight = {} + self._highlight_opts = {} + self._suggest = {} + self._script_fields = {} + self._response_class = Response + + self._query_proxy = QueryProxy(self, "query") + self._post_filter_proxy = QueryProxy(self, "post_filter") + + def filter(self, *args, **kwargs): + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args, **kwargs): + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + def __getitem__(self, n): + """ + Support slicing the `AsyncSearch` instance for pagination. + + Slicing equates to the from/size parameters. E.g.:: + + s = AsyncSearch().query(...)[0:25] + + is equivalent to:: + + s = AsyncSearch().query(...).extra(from_=0, size=25) + + """ + s = self._clone() + + if isinstance(n, slice): + # If negative slicing, abort. + if n.start and n.start < 0 or n.stop and n.stop < 0: + raise ValueError("AsyncSearch does not support negative slicing.") + # OpenSearch won't get all results so we default to size: 10 if + # stop not given. + s._extra["from"] = n.start or 0 + s._extra["size"] = max( + 0, n.stop - (n.start or 0) if n.stop is not None else 10 + ) + return s + else: # This is an index lookup, equivalent to slicing by [n:n+1]. + # If negative index, abort. + if n < 0: + raise ValueError("AsyncSearch does not support negative indexing.") + s._extra["from"] = n + s._extra["size"] = 1 + return s + + @classmethod + def from_dict(cls, d): + """ + Construct a new `AsyncSearch` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + s = AsyncSearch.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "aggs": {...} + }) + s = s.filter('term', published=True) + """ + s = cls() + s.update_from_dict(d) + return s + + def _clone(self): + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + s = super(AsyncSearch, self)._clone() + + s._response_class = self._response_class + s._sort = self._sort[:] + s._source = copy.copy(self._source) if self._source is not None else None + s._highlight = self._highlight.copy() + s._highlight_opts = self._highlight_opts.copy() + s._suggest = self._suggest.copy() + s._script_fields = self._script_fields.copy() + for x in ("query", "post_filter"): + getattr(s, x)._proxied = getattr(self, x)._proxied + + # copy top-level bucket definitions + if self.aggs._params.get("aggs"): + s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()} + return s + + def response_class(self, cls): + """ + Override the default wrapper used for the response. + """ + s = self._clone() + s._response_class = cls + return s + + def update_from_dict(self, d): + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "post_filter" in d: + self.post_filter._proxied = Q(d.pop("post_filter")) + + aggs = d.pop("aggs", d.pop("aggregations", {})) + if aggs: + self.aggs._params = { + "aggs": {name: A(value) for (name, value) in iteritems(aggs)} + } + if "sort" in d: + self._sort = d.pop("sort") + if "_source" in d: + self._source = d.pop("_source") + if "highlight" in d: + high = d.pop("highlight").copy() + self._highlight = high.pop("fields") + self._highlight_opts = high + if "suggest" in d: + self._suggest = d.pop("suggest") + if "text" in self._suggest: + text = self._suggest.pop("text") + for s in self._suggest.values(): + s.setdefault("text", text) + if "script_fields" in d: + self._script_fields = d.pop("script_fields") + self._extra.update(d) + return self + + def script_fields(self, **kwargs): + """ + Define script fields to be calculated on hits. + + Example:: + + s = AsyncSearch() + s = s.script_fields(times_two="doc['field'].value * 2") + s = s.script_fields( + times_three={ + 'script': { + 'lang': 'painless', + 'source': "doc['field'].value * params.n", + 'params': {'n': 3} + } + } + ) + + """ + s = self._clone() + for name in kwargs: + if isinstance(kwargs[name], string_types): + kwargs[name] = {"script": kwargs[name]} + s._script_fields.update(kwargs) + return s + + def source(self, fields=None, **kwargs): + """ + Selectively control how the _source field is returned. + + :arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes + + If ``fields`` is None, the entire document will be returned for + each hit. If fields is a dictionary with keys of 'includes' and/or + 'excludes' the fields will be either included or excluded appropriately. + + Calling this multiple times with the same named parameter will override the + previous values with the new ones. + + Example:: + + s = AsyncSearch() + s = s.source(includes=['obj1.*'], excludes=["*.description"]) + + s = AsyncSearch() + s = s.source(includes=['obj1.*']).source(excludes=["*.description"]) + + """ + s = self._clone() + + if fields and kwargs: + raise ValueError("You cannot specify fields and kwargs at the same time.") + + if fields is not None: + s._source = fields + return s + + if kwargs and not isinstance(s._source, dict): + s._source = {} + + for key, value in kwargs.items(): + if value is None: + try: + del s._source[key] + except KeyError: + pass + else: + s._source[key] = value + + return s + + def sort(self, *keys): + """ + Add sorting information to the search request. If called without + arguments it will remove all sort requirements. Otherwise it will + replace them. Acceptable arguments are:: + + 'some.field' + '-some.other.field' + {'different.field': {'any': 'dict'}} + + so for example:: + + s = AsyncSearch().sort( + 'category', + '-title', + {"price" : {"order" : "asc", "mode" : "avg"}} + ) + + will sort by ``category``, ``title`` (in descending order) and + ``price`` in ascending order using the ``avg`` mode. + + The API returns a copy of the AsyncSearch object and can thus be chained. + """ + s = self._clone() + s._sort = [] + for k in keys: + if isinstance(k, string_types) and k.startswith("-"): + if k[1:] == "_score": + raise IllegalOperation("Sorting by `-_score` is not allowed.") + k = {k[1:]: {"order": "desc"}} + s._sort.append(k) + return s + + def highlight_options(self, **kwargs): + """ + Update the global highlighting options used for this request. For + example:: + + s = AsyncSearch() + s = s.highlight_options(order='score') + """ + s = self._clone() + s._highlight_opts.update(kwargs) + return s + + def highlight(self, *fields, **kwargs): + """ + Request highlighting of some fields. All keyword arguments passed in will be + used as parameters for all the fields in the ``fields`` parameter. Example:: + + AsyncSearch().highlight('title', 'body', fragment_size=50) + + will produce the equivalent of:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 50}, + "title": {"fragment_size": 50} + } + } + } + + If you want to have different options for different fields + you can call ``highlight`` twice:: + + AsyncSearch().highlight('title', fragment_size=50).highlight('body', fragment_size=100) + + which will produce:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 100}, + "title": {"fragment_size": 50} + } + } + } + + """ + s = self._clone() + for f in fields: + s._highlight[f] = kwargs + return s + + def suggest(self, name, text, **kwargs): + """ + Add a suggestions request to the search. + + :arg name: name of the suggestion + :arg text: text to suggest on + + All keyword arguments will be added to the suggestions body. For example:: + + s = AsyncSearch() + s = s.suggest('suggestion-1', 'AsyncOpenSearch', term={'field': 'body'}) + """ + s = self._clone() + s._suggest[name] = {"text": text} + s._suggest[name].update(kwargs) + return s + + def to_dict(self, count=False, **kwargs): + """ + Serialize the search into the dictionary that will be sent over as the + request's body. + + :arg count: a flag to specify if we are interested in a body for count - + no aggregations, no pagination bounds etc. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + + if self.query: + d["query"] = self.query.to_dict() + + # count request doesn't care for sorting and other things + if not count: + if self.post_filter: + d["post_filter"] = self.post_filter.to_dict() + + if self.aggs.aggs: + d.update(self.aggs.to_dict()) + + if self._sort: + d["sort"] = self._sort + + d.update(recursive_to_dict(self._extra)) + + if self._source not in (None, {}): + d["_source"] = self._source + + if self._highlight: + d["highlight"] = {"fields": self._highlight} + d["highlight"].update(self._highlight_opts) + + if self._suggest: + d["suggest"] = self._suggest + + if self._script_fields: + d["script_fields"] = self._script_fields + + d.update(recursive_to_dict(kwargs)) + return d + + async def count(self): + """ + Return the number of hits matching the query and filters. Note that + only the actual number is returned. + """ + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": + return self._response.hits.total.value + + opensearch = await get_connection(self._using) + + d = self.to_dict(count=True) + # TODO: failed shards detection + return (await opensearch.count(index=self._index, body=d, **self._params))[ + "count" + ] + + async def execute(self, ignore_cache=False): + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + + :arg ignore_cache: if set to ``True``, consecutive calls will hit + AsyncOpenSearch, while cached result will be ignored. Defaults to `False` + """ + if ignore_cache or not hasattr(self, "_response"): + opensearch = await get_connection(self._using) + + self._response = self._response_class( + self, + await opensearch.search( + index=self._index, body=self.to_dict(), **self._params + ), + ) + return self._response + + async def scan(self): + """ + Turn the search into a scan search and return a generator that will + iterate over all the documents matching the query. + + Use ``params`` method to specify any additional arguments you with to + pass to the underlying ``async_scan`` helper from ``opensearchpy`` + + """ + opensearch = await get_connection(self._using) + + async for hit in aiter( + async_scan( + opensearch, query=self.to_dict(), index=self._index, **self._params + ) + ): + yield self._get_result(hit) + + async def delete(self): + """ + delete() executes the query by delegating to delete_by_query() + """ + + opensearch = await get_connection(self._using) + + return AttrDict( + await opensearch.delete_by_query( + index=self._index, body=self.to_dict(), **self._params + ) + ) + + +class AsyncMultiSearch(Request): + """ + Combine multiple :class:`~opensearchpy.AsyncSearch` objects into a single + request. + """ + + def __init__(self, **kwargs): + super(AsyncMultiSearch, self).__init__(**kwargs) + self._searches = [] + + def __getitem__(self, key): + return self._searches[key] + + def __iter__(self): + return iter(self._searches) + + def _clone(self): + ms = super(AsyncMultiSearch, self)._clone() + ms._searches = self._searches[:] + return ms + + def add(self, search): + """ + Adds a new :class:`~opensearchpy.AsyncSearch` object to the request:: + + ms = AsyncMultiSearch(index='my-index') + ms = ms.add(AsyncSearch(doc_type=Category).filter('term', category='python')) + ms = ms.add(AsyncSearch(doc_type=Blog)) + """ + ms = self._clone() + ms._searches.append(search) + return ms + + def to_dict(self): + out = [] + for s in self._searches: + meta = {} + if s._index: + meta["index"] = s._index + meta.update(s._params) + + out.append(meta) + out.append(s.to_dict()) + + return out + + async def execute(self, ignore_cache=False, raise_on_error=True): + """ + Execute the multi search request and return a list of search results. + """ + if ignore_cache or not hasattr(self, "_response"): + opensearch = await get_connection(self._using) + + responses = await opensearch.msearch( + index=self._index, body=self.to_dict(), **self._params + ) + + out = [] + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): + if raise_on_error: + raise TransportError("N/A", r["error"]["type"], r["error"]) + r = None + else: + r = Response(s, r) + out.append(r) + + self._response = out + + return self._response diff --git a/opensearchpy/_async/helpers/search.pyi b/opensearchpy/_async/helpers/search.pyi new file mode 100644 index 00000000..4fb1cd3c --- /dev/null +++ b/opensearchpy/_async/helpers/search.pyi @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from opensearchpy.helpers.search import Request + +class AsyncSearch(Request): ... +class AsyncMultiSearch(Request): ... diff --git a/opensearchpy/_async/helpers/test.py b/opensearchpy/_async/helpers/test.py new file mode 100644 index 00000000..c8e43273 --- /dev/null +++ b/opensearchpy/_async/helpers/test.py @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import os +import time +from unittest import SkipTest + +from opensearchpy import AsyncOpenSearch +from opensearchpy.exceptions import ConnectionError + +if "OPENSEARCH_URL" in os.environ: + OPENSEARCH_URL = os.environ["OPENSEARCH_URL"] +else: + OPENSEARCH_URL = "https://admin:admin@localhost:9200" + + +async def get_test_client(nowait=False, **kwargs): + # construct kwargs from the environment + kw = {"timeout": 30} + + from opensearchpy import AsyncConnection + + async_connection = AsyncConnection() + if hasattr(async_connection, "AIOHttpConnection"): + kw["connection_class"] = getattr(async_connection, "AIOHttpConnection") + + kw.update(kwargs) + client = AsyncOpenSearch(OPENSEARCH_URL, **kw) + + # wait for yellow status + for _ in range(1 if nowait else 100): + try: + await client.cluster.health(wait_for_status="yellow") + return client + except ConnectionError: + time.sleep(0.1) + else: + # timeout + raise SkipTest("OpenSearch failed to start.") diff --git a/opensearchpy/_async/helpers/test.pyi b/opensearchpy/_async/helpers/test.pyi new file mode 100644 index 00000000..451bfc14 --- /dev/null +++ b/opensearchpy/_async/helpers/test.pyi @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from typing import Any + +from _typeshed import Incomplete + +from opensearchpy import AsyncOpenSearch as AsyncOpenSearch +from opensearchpy.exceptions import ConnectionError as ConnectionError + +OPENSEARCH_URL: Incomplete + +async def get_test_client(nowait: bool = ..., **kwargs: Any) -> Any: ... diff --git a/opensearchpy/_async/helpers/update_by_query.py b/opensearchpy/_async/helpers/update_by_query.py new file mode 100644 index 00000000..322b1488 --- /dev/null +++ b/opensearchpy/_async/helpers/update_by_query.py @@ -0,0 +1,151 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from opensearchpy.connection.async_connections import get_connection +from opensearchpy.helpers.query import Bool, Q +from opensearchpy.helpers.response import UpdateByQueryResponse +from opensearchpy.helpers.search import ProxyDescriptor, QueryProxy, Request +from opensearchpy.helpers.utils import recursive_to_dict + + +class AsyncUpdateByQuery(Request): + query = ProxyDescriptor("query") + + def __init__(self, **kwargs): + """ + Update by query request to opensearch. + + :arg using: `AsyncOpenSearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + + """ + super(AsyncUpdateByQuery, self).__init__(**kwargs) + self._response_class = UpdateByQueryResponse + self._script = {} + self._query_proxy = QueryProxy(self, "query") + + def filter(self, *args, **kwargs): + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args, **kwargs): + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + @classmethod + def from_dict(cls, d): + """ + Construct a new `AsyncUpdateByQuery` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + ubq = AsyncUpdateByQuery.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "script": {...} + }) + ubq = ubq.filter('term', published=True) + """ + u = cls() + u.update_from_dict(d) + return u + + def _clone(self): + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + ubq = super(AsyncUpdateByQuery, self)._clone() + + ubq._response_class = self._response_class + ubq._script = self._script.copy() + ubq.query._proxied = self.query._proxied + return ubq + + def response_class(self, cls): + """ + Override the default wrapper used for the response. + """ + ubq = self._clone() + ubq._response_class = cls + return ubq + + def update_from_dict(self, d): + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "script" in d: + self._script = d.pop("script") + self._extra.update(d) + return self + + def script(self, **kwargs): + """ + Define update action to take: + + Note: the API only accepts a single script, so + calling the script multiple times will overwrite. + + Example:: + + ubq = AsyncSearch() + ubq = ubq.script(source="ctx._source.likes++"") + ubq = ubq.script(source="ctx._source.likes += params.f"", + lang="expression", + params={'f': 3}) + """ + ubq = self._clone() + if ubq._script: + ubq._script = {} + ubq._script.update(kwargs) + return ubq + + def to_dict(self, **kwargs): + """ + Serialize the search into the dictionary that will be sent over as the + request'ubq body. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + if self.query: + d["query"] = self.query.to_dict() + + if self._script: + d["script"] = self._script + + d.update(recursive_to_dict(self._extra)) + d.update(recursive_to_dict(kwargs)) + return d + + async def execute(self): + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + """ + opensearch = await get_connection(self._using) + + self._response = self._response_class( + self, + await opensearch.update_by_query( + index=self._index, body=self.to_dict(), **self._params + ), + ) + return self._response diff --git a/opensearchpy/_async/helpers/update_by_query.pyi b/opensearchpy/_async/helpers/update_by_query.pyi new file mode 100644 index 00000000..3c5a9ed7 --- /dev/null +++ b/opensearchpy/_async/helpers/update_by_query.pyi @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from opensearchpy.helpers.search import Request + +class AsyncUpdateByQuery(Request): ... diff --git a/opensearchpy/connection/async_connections.py b/opensearchpy/connection/async_connections.py new file mode 100644 index 00000000..acaa0b68 --- /dev/null +++ b/opensearchpy/connection/async_connections.py @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from six import string_types + +import opensearchpy +from opensearchpy._async.helpers.actions import aiter +from opensearchpy.serializer import serializer + + +class AsyncConnections(object): + """ + Class responsible for holding connections to different clusters. Used as a + singleton in this module. + """ + + def __init__(self): + self._kwargs = {} + self._conns = {} + + async def configure(self, **kwargs): + """ + Configure multiple connections at once, useful for passing in config + dictionaries obtained from other sources, like Django's settings or a + configuration management tool. + + Example:: + + async_connections.configure( + default={'hosts': 'localhost'}, + dev={'hosts': ['opensearchdev1.example.com:9200'], 'sniff_on_start': True}, + ) + + Connections will only be constructed lazily when requested through + ``get_connection``. + """ + async for k in aiter(list(self._conns)): + # try and preserve existing client to keep the persistent connections alive + if k in self._kwargs and kwargs.get(k, None) == self._kwargs[k]: + continue + del self._conns[k] + self._kwargs = kwargs + + async def add_connection(self, alias, conn): + """ + Add a connection object, it will be passed through as-is. + """ + self._conns[alias] = conn + + async def remove_connection(self, alias): + """ + Remove connection from the registry. Raises ``KeyError`` if connection + wasn't found. + """ + errors = 0 + async for d in aiter((self._conns, self._kwargs)): + try: + del d[alias] + except KeyError: + errors += 1 + + if errors == 2: + raise KeyError("There is no connection with alias %r." % alias) + + async def create_connection(self, alias="default", **kwargs): + """ + Construct an instance of ``opensearchpy.AsyncOpenSearch`` and register + it under given alias. + """ + kwargs.setdefault("serializer", serializer) + conn = self._conns[alias] = opensearchpy.AsyncOpenSearch(**kwargs) + return conn + + async def get_connection(self, alias="default"): + """ + Retrieve a connection, construct it if necessary (only configuration + was passed to us). If a non-string alias has been passed through we + assume it's already a client instance and will just return it as-is. + + Raises ``KeyError`` if no client (or its definition) is registered + under the alias. + """ + # do not check isinstance(AsyncOpenSearch) so that people can wrap their + # clients + if not isinstance(alias, string_types): + return alias + + # connection already established + try: + return self._conns[alias] + except KeyError: + pass + + # if not, try to create it + try: + return await self.create_connection(alias, **self._kwargs[alias]) + except KeyError: + # no connection and no kwargs to set one up + raise KeyError("There is no connection with alias %r." % alias) + + +async_connections = AsyncConnections() +configure = async_connections.configure +add_connection = async_connections.add_connection +remove_connection = async_connections.remove_connection +create_connection = async_connections.create_connection +get_connection = async_connections.get_connection diff --git a/opensearchpy/connection/async_connections.pyi b/opensearchpy/connection/async_connections.pyi new file mode 100644 index 00000000..8935ec6b --- /dev/null +++ b/opensearchpy/connection/async_connections.pyi @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +class AsyncConnections: ... diff --git a/opensearchpy/helpers/__init__.py b/opensearchpy/helpers/__init__.py index 94771696..72a7d140 100644 --- a/opensearchpy/helpers/__init__.py +++ b/opensearchpy/helpers/__init__.py @@ -59,7 +59,7 @@ # Asyncio only supported on Python 3.6+ if sys.version_info >= (3, 6): - from .._async.helpers import ( + from .._async.helpers.actions import ( async_bulk, async_reindex, async_scan, diff --git a/opensearchpy/helpers/__init__.pyi b/opensearchpy/helpers/__init__.pyi index bc307c7b..59b5cefd 100644 --- a/opensearchpy/helpers/__init__.pyi +++ b/opensearchpy/helpers/__init__.pyi @@ -42,10 +42,10 @@ try: if sys.version_info < (3, 6): raise ImportError - from .._async.helpers import async_bulk as async_bulk - from .._async.helpers import async_reindex as async_reindex - from .._async.helpers import async_scan as async_scan - from .._async.helpers import async_streaming_bulk as async_streaming_bulk + from .._async.helpers.actions import async_bulk as async_bulk + from .._async.helpers.actions import async_reindex as async_reindex + from .._async.helpers.actions import async_scan as async_scan + from .._async.helpers.actions import async_streaming_bulk as async_streaming_bulk from .asyncsigner import AWSV4SignerAsyncAuth as AWSV4SignerAsyncAuth from .signer import AWSV4SignerAuth as AWSV4SignerAuth except (ImportError, SyntaxError): diff --git a/opensearchpy/helpers/analysis.py b/opensearchpy/helpers/analysis.py index 13478145..251b004a 100644 --- a/opensearchpy/helpers/analysis.py +++ b/opensearchpy/helpers/analysis.py @@ -27,8 +27,7 @@ import six from opensearchpy.connection.connections import get_connection - -from .utils import AttrDict, DslBase, merge +from opensearchpy.helpers.utils import AttrDict, DslBase, merge __all__ = ["tokenizer", "analyzer", "char_filter", "token_filter", "normalizer"] diff --git a/test_opensearchpy/test_async/test_connection.py b/test_opensearchpy/test_async/test_connection.py index 14f3a3a2..d43b2bb5 100644 --- a/test_opensearchpy/test_async/test_connection.py +++ b/test_opensearchpy/test_async/test_connection.py @@ -37,10 +37,11 @@ import pytest from mock import patch from multidict import CIMultiDict +from pytest import raises -from opensearchpy import AIOHttpConnection, __versionstr__ +from opensearchpy import AIOHttpConnection, AsyncOpenSearch, __versionstr__, serializer from opensearchpy.compat import reraise_exceptions -from opensearchpy.connection import Connection +from opensearchpy.connection import Connection, async_connections from opensearchpy.exceptions import ConnectionError pytestmark = pytest.mark.asyncio @@ -373,3 +374,78 @@ async def test_aiohttp_connection_error(self): conn = AIOHttpConnection("not.a.host.name") with pytest.raises(ConnectionError): await conn.perform_request("GET", "/") + + +async def test_default_connection_is_returned_by_default(): + c = async_connections.AsyncConnections() + + con, con2 = object(), object() + await c.add_connection("default", con) + + await c.add_connection("not-default", con2) + + assert await c.get_connection() is con + + +async def test_get_connection_created_connection_if_needed(): + c = async_connections.AsyncConnections() + await c.configure( + default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]} + ) + default = await c.get_connection() + local = await c.get_connection("local") + assert isinstance(default, AsyncOpenSearch) + assert isinstance(local, AsyncOpenSearch) + assert [{"host": "opensearch.com"}] == default.transport.hosts + assert [{"host": "localhost"}] == local.transport.hosts + + +async def test_configure_preserves_unchanged_connections(): + c = async_connections.AsyncConnections() + + await c.configure( + default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]} + ) + default = await c.get_connection() + local = await c.get_connection("local") + + await c.configure( + default={"hosts": ["not-opensearch.com"]}, local={"hosts": ["localhost"]} + ) + new_default = await c.get_connection() + new_local = await c.get_connection("local") + + assert new_local is local + assert new_default is not default + + +async def test_remove_connection_removes_both_conn_and_conf(): + c = async_connections.AsyncConnections() + + await c.configure( + default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]} + ) + await c.add_connection("local2", object()) + + await c.remove_connection("default") + await c.get_connection("local2") + await c.remove_connection("local2") + + with raises(Exception): + await c.get_connection("local2") + await c.get_connection("default") + + +async def test_create_connection_constructs_client(): + c = async_connections.AsyncConnections() + await c.create_connection("testing", hosts=["opensearch.com"]) + + con = await c.get_connection("testing") + assert [{"host": "opensearch.com"}] == con.transport.hosts + + +async def test_create_connection_adds_our_serializer(): + c = async_connections.AsyncConnections() + await c.create_connection("testing", hosts=["opensearch.com"]) + result = await c.get_connection("testing") + assert result.transport.serializer is serializer.serializer diff --git a/test_opensearchpy/test_async/test_helpers/conftest.py b/test_opensearchpy/test_async/test_helpers/conftest.py new file mode 100644 index 00000000..56a6bf31 --- /dev/null +++ b/test_opensearchpy/test_async/test_helpers/conftest.py @@ -0,0 +1,227 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + + +import pytest +from mock import Mock +from pytest import fixture + +from opensearchpy.connection.async_connections import add_connection, async_connections + +pytestmark = pytest.mark.asyncio + + +@fixture +async def mock_client(dummy_response): + client = Mock() + client.search.return_value = dummy_response + await add_connection("mock", client) + yield client + async_connections._conn = {} + async_connections._kwargs = {} + + +@fixture +def dummy_response(): + return { + "_shards": {"failed": 0, "successful": 10, "total": 10}, + "hits": { + "hits": [ + { + "_index": "test-index", + "_id": "opensearch", + "_score": 12.0, + "_source": {"city": "Amsterdam", "name": "OpenSearch"}, + }, + { + "_index": "test-index", + "_id": "42", + "_score": 11.123, + "_routing": "opensearch", + "_source": { + "name": {"first": "Shay", "last": "Bannon"}, + "lang": "java", + "twitter": "kimchy", + }, + }, + { + "_index": "test-index", + "_id": "47", + "_score": 1, + "_routing": "opensearch", + "_source": { + "name": {"first": "Honza", "last": "Král"}, + "lang": "python", + "twitter": "honzakral", + }, + }, + { + "_index": "test-index", + "_id": "53", + "_score": 16.0, + "_routing": "opensearch", + }, + ], + "max_score": 12.0, + "total": 123, + }, + "timed_out": False, + "took": 123, + } + + +@fixture +def aggs_search(): + from opensearchpy._async.helpers.search import AsyncSearch + + s = AsyncSearch(index="flat-git") + s.aggs.bucket("popular_files", "terms", field="files", size=2).metric( + "line_stats", "stats", field="stats.lines" + ).metric("top_commits", "top_hits", size=2, _source=["stats.*", "committed_date"]) + s.aggs.bucket( + "per_month", "date_histogram", interval="month", field="info.committed_date" + ) + s.aggs.metric("sum_lines", "sum", field="stats.lines") + return s + + +@fixture +def aggs_data(): + return { + "took": 4, + "timed_out": False, + "_shards": {"total": 1, "successful": 1, "failed": 0}, + "hits": {"total": 52, "hits": [], "max_score": 0.0}, + "aggregations": { + "sum_lines": {"value": 25052.0}, + "per_month": { + "buckets": [ + { + "doc_count": 38, + "key": 1393632000000, + "key_as_string": "2014-03-01T00:00:00.000Z", + }, + { + "doc_count": 11, + "key": 1396310400000, + "key_as_string": "2014-04-01T00:00:00.000Z", + }, + { + "doc_count": 3, + "key": 1398902400000, + "key_as_string": "2014-05-01T00:00:00.000Z", + }, + ] + }, + "popular_files": { + "buckets": [ + { + "key": "opensearchpy", + "line_stats": { + "count": 40, + "max": 228.0, + "min": 2.0, + "sum": 2151.0, + "avg": 53.775, + }, + "doc_count": 40, + "top_commits": { + "hits": { + "total": 40, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "_type": "doc", + "_source": { + "stats": { + "files": 1, + "deletions": 0, + "lines": 18, + "insertions": 18, + }, + "committed_date": "2014-05-01T13:32:14", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + { + "key": "test_opensearchpy/test_dsl", + "line_stats": { + "count": 35, + "max": 228.0, + "min": 2.0, + "sum": 1939.0, + "avg": 55.4, + }, + "doc_count": 35, + "top_commits": { + "hits": { + "total": 35, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "_type": "doc", + "_source": { + "stats": { + "files": 3, + "deletions": 18, + "lines": 62, + "insertions": 44, + }, + "committed_date": "2014-05-01T13:30:44", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 120, + }, + }, + } diff --git a/test_opensearchpy/test_async/test_helpers/test_document.py b/test_opensearchpy/test_async/test_helpers/test_document.py new file mode 100644 index 00000000..44aaf1b5 --- /dev/null +++ b/test_opensearchpy/test_async/test_helpers/test_document.py @@ -0,0 +1,629 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from __future__ import unicode_literals + +import codecs +import ipaddress +import pickle +from datetime import datetime +from hashlib import sha256 + +import pytest +from pytest import raises + +from opensearchpy import InnerDoc, MetaField, Range, analyzer +from opensearchpy._async.helpers import document +from opensearchpy._async.helpers.index import AsyncIndex +from opensearchpy._async.helpers.mapping import AsyncMapping +from opensearchpy.exceptions import IllegalOperation, ValidationException +from opensearchpy.helpers import field, utils + +pytestmark = pytest.mark.asyncio + + +class MyInner(InnerDoc): + old_field = field.Text() + + +class MyDoc(document.AsyncDocument): + title = field.Keyword() + name = field.Text() + created_at = field.Date() + inner = field.Object(MyInner) + + +class MySubDoc(MyDoc): + name = field.Keyword() + + class Index: + name = "default-index" + + +class MyDoc2(document.AsyncDocument): + extra = field.Long() + + +class MyMultiSubDoc(MyDoc2, MySubDoc): + pass + + +class Comment(InnerDoc): + title = field.Text() + tags = field.Keyword(multi=True) + + +class DocWithNested(document.AsyncDocument): + comments = field.Nested(Comment) + + class Index: + name = "test-doc-with-nested" + + +class SimpleCommit(document.AsyncDocument): + files = field.Text(multi=True) + + class Index: + name = "test-git" + + +class Secret(str): + pass + + +class SecretField(field.CustomField): + builtin_type = "text" + + def _serialize(self, data): + return codecs.encode(data, "rot_13") + + def _deserialize(self, data): + if isinstance(data, Secret): + return data + return Secret(codecs.decode(data, "rot_13")) + + +class SecretDoc(document.AsyncDocument): + title = SecretField(index="no") + + class Index: + name = "test-secret-doc" + + +class NestedSecret(document.AsyncDocument): + secrets = field.Nested(SecretDoc) + + class Index: + name = "test-nested-secret" + + +class OptionalObjectWithRequiredField(document.AsyncDocument): + comments = field.Nested(properties={"title": field.Keyword(required=True)}) + + class Index: + name = "test-required" + + +class Host(document.AsyncDocument): + ip = field.Ip() + + class Index: + name = "test-host" + + +async def test_range_serializes_properly(): + class D(document.AsyncDocument): + lr = field.LongRange() + + d = D(lr=Range(lt=42)) + assert 40 in d.lr + assert 47 not in d.lr + assert {"lr": {"lt": 42}} == d.to_dict() + + d = D(lr={"lt": 42}) + assert {"lr": {"lt": 42}} == d.to_dict() + + +async def test_range_deserializes_properly(): + class D(InnerDoc): + lr = field.LongRange() + + d = D.from_opensearch({"lr": {"lt": 42}}, True) + assert isinstance(d.lr, Range) + assert 40 in d.lr + assert 47 not in d.lr + + +async def test_resolve_nested(): + nested, field = NestedSecret._index.resolve_nested("secrets.title") + assert nested == ["secrets"] + assert field is NestedSecret._doc_type.mapping["secrets"]["title"] + + +async def test_conflicting_mapping_raises_error_in_index_to_dict(): + class A(document.AsyncDocument): + name = field.Text() + + class B(document.AsyncDocument): + name = field.Keyword() + + i = AsyncIndex("i") + i.document(A) + i.document(B) + + with raises(ValueError): + i.to_dict() + + +async def test_ip_address_serializes_properly(): + host = Host(ip=ipaddress.IPv4Address("10.0.0.1")) + + assert {"ip": "10.0.0.1"} == host.to_dict() + + +async def test_matches_uses_index(): + assert SimpleCommit._matches({"_index": "test-git"}) + assert not SimpleCommit._matches({"_index": "not-test-git"}) + + +async def test_matches_with_no_name_always_matches(): + class D(document.AsyncDocument): + pass + + assert D._matches({}) + assert D._matches({"_index": "whatever"}) + + +async def test_matches_accepts_wildcards(): + class MyDoc(document.AsyncDocument): + class Index: + name = "my-*" + + assert MyDoc._matches({"_index": "my-index"}) + assert not MyDoc._matches({"_index": "not-my-index"}) + + +async def test_assigning_attrlist_to_field(): + sc = SimpleCommit() + ls = ["README", "README.rst"] + sc.files = utils.AttrList(ls) + + assert sc.to_dict()["files"] is ls + + +async def test_optional_inner_objects_are_not_validated_if_missing(): + d = OptionalObjectWithRequiredField() + + assert d.full_clean() is None + + +async def test_custom_field(): + s = SecretDoc(title=Secret("Hello")) + + assert {"title": "Uryyb"} == s.to_dict() + assert s.title == "Hello" + + s = SecretDoc.from_opensearch({"_source": {"title": "Uryyb"}}) + assert s.title == "Hello" + assert isinstance(s.title, Secret) + + +async def test_custom_field_mapping(): + assert { + "properties": {"title": {"index": "no", "type": "text"}} + } == SecretDoc._doc_type.mapping.to_dict() + + +async def test_custom_field_in_nested(): + s = NestedSecret() + s.secrets.append(SecretDoc(title=Secret("Hello"))) + + assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict() + assert s.secrets[0].title == "Hello" + + +async def test_multi_works_after_doc_has_been_saved(): + c = SimpleCommit() + c.full_clean() + c.files.append("setup.py") + + assert c.to_dict() == {"files": ["setup.py"]} + + +async def test_multi_works_in_nested_after_doc_has_been_serialized(): + # Issue #359 + c = DocWithNested(comments=[Comment(title="First!")]) + + assert [] == c.comments[0].tags + assert {"comments": [{"title": "First!"}]} == c.to_dict() + assert [] == c.comments[0].tags + + +async def test_null_value_for_object(): + d = MyDoc(inner=None) + + assert d.inner is None + + +async def test_inherited_doc_types_can_override_index(): + class MyDocDifferentIndex(MySubDoc): + class Index: + name = "not-default-index" + settings = {"number_of_replicas": 0} + aliases = {"a": {}} + analyzers = [analyzer("my_analizer", tokenizer="keyword")] + + assert MyDocDifferentIndex._index._name == "not-default-index" + assert MyDocDifferentIndex()._get_index() == "not-default-index" + assert MyDocDifferentIndex._index.to_dict() == { + "aliases": {"a": {}}, + "mappings": { + "properties": { + "created_at": {"type": "date"}, + "inner": { + "type": "object", + "properties": {"old_field": {"type": "text"}}, + }, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + } + }, + "settings": { + "analysis": { + "analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}} + }, + "number_of_replicas": 0, + }, + } + + +async def test_to_dict_with_meta(): + d = MySubDoc(title="hello") + d.meta.routing = "some-parent" + + assert { + "_index": "default-index", + "_routing": "some-parent", + "_source": {"title": "hello"}, + } == d.to_dict(True) + + +async def test_to_dict_with_meta_includes_custom_index(): + d = MySubDoc(title="hello") + d.meta.index = "other-index" + + assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True) + + +async def test_to_dict_without_skip_empty_will_include_empty_fields(): + d = MySubDoc(tags=[], title=None, inner={}) + + assert {} == d.to_dict() + assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False) + + +async def test_attribute_can_be_removed(): + d = MyDoc(title="hello") + + del d.title + assert "title" not in d._d_ + + +async def test_doc_type_can_be_correctly_pickled(): + d = DocWithNested( + title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42} + ) + s = pickle.dumps(d) + + d2 = pickle.loads(s) + + assert d2 == d + assert 42 == d2.meta.id + assert "Hello World!" == d2.title + assert [{"title": "hellp"}] == d2.comments + assert isinstance(d2.comments[0], Comment) + + +async def test_meta_is_accessible_even_on_empty_doc(): + d = MyDoc() + d.meta + + d = MyDoc(title="aaa") + d.meta + + +async def test_meta_field_mapping(): + class User(document.AsyncDocument): + username = field.Text() + + class Meta: + all = MetaField(enabled=False) + _index = MetaField(enabled=True) + dynamic = MetaField("strict") + dynamic_templates = MetaField([42]) + + assert { + "properties": {"username": {"type": "text"}}, + "_all": {"enabled": False}, + "_index": {"enabled": True}, + "dynamic": "strict", + "dynamic_templates": [42], + } == User._doc_type.mapping.to_dict() + + +async def test_multi_value_fields(): + class Blog(document.AsyncDocument): + tags = field.Keyword(multi=True) + + b = Blog() + assert [] == b.tags + b.tags.append("search") + b.tags.append("python") + assert ["search", "python"] == b.tags + + +async def test_docs_with_properties(): + class User(document.AsyncDocument): + pwd_hash = field.Text() + + def check_password(self, pwd): + return sha256(pwd).hexdigest() == self.pwd_hash + + @property + def password(self): + raise AttributeError("readonly") + + @password.setter + def password(self, pwd): + self.pwd_hash = sha256(pwd).hexdigest() + + u = User(pwd_hash=sha256(b"secret").hexdigest()) + assert u.check_password(b"secret") + assert not u.check_password(b"not-secret") + + u.password = b"not-secret" + assert "password" not in u._d_ + assert not u.check_password(b"secret") + assert u.check_password(b"not-secret") + + with raises(AttributeError): + u.password + + +async def test_nested_can_be_assigned_to(): + d1 = DocWithNested(comments=[Comment(title="First!")]) + d2 = DocWithNested() + + d2.comments = d1.comments + assert isinstance(d1.comments[0], Comment) + assert d2.comments == [{"title": "First!"}] + assert {"comments": [{"title": "First!"}]} == d2.to_dict() + assert isinstance(d2.comments[0], Comment) + + +async def test_nested_can_be_none(): + d = DocWithNested(comments=None, title="Hello World!") + + assert {"title": "Hello World!"} == d.to_dict() + + +async def test_nested_defaults_to_list_and_can_be_updated(): + md = DocWithNested() + + assert [] == md.comments + + md.comments.append({"title": "hello World!"}) + assert {"comments": [{"title": "hello World!"}]} == md.to_dict() + + +async def test_to_dict_is_recursive_and_can_cope_with_multi_values(): + md = MyDoc(name=["a", "b", "c"]) + md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")] + + assert isinstance(md.inner[0], MyInner) + + assert { + "name": ["a", "b", "c"], + "inner": [{"old_field": "of1"}, {"old_field": "of2"}], + } == md.to_dict() + + +async def test_to_dict_ignores_empty_collections(): + md = MySubDoc(name="", address={}, count=0, valid=False, tags=[]) + + assert {"name": "", "count": 0, "valid": False} == md.to_dict() + + +async def test_declarative_mapping_definition(): + assert issubclass(MyDoc, document.AsyncDocument) + assert hasattr(MyDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "text"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MyDoc._doc_type.mapping.to_dict() + + +async def test_you_can_supply_own_mapping_instance(): + class MyD(document.AsyncDocument): + title = field.Text() + + class Meta: + mapping = AsyncMapping() + mapping.meta("_all", enabled=False) + + assert { + "_all": {"enabled": False}, + "properties": {"title": {"type": "text"}}, + } == MyD._doc_type.mapping.to_dict() + + +async def test_document_can_be_created_dynamically(): + n = datetime.now() + md = MyDoc(title="hello") + md.name = "My Fancy Document!" + md.created_at = n + + inner = md.inner + # consistent returns + assert inner is md.inner + inner.old_field = "Already defined." + + md.inner.new_field = ["undefined", "field"] + + assert { + "title": "hello", + "name": "My Fancy Document!", + "created_at": n, + "inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]}, + } == md.to_dict() + + +async def test_invalid_date_will_raise_exception(): + md = MyDoc() + md.created_at = "not-a-date" + with raises(ValidationException): + md.full_clean() + + +async def test_document_inheritance(): + assert issubclass(MySubDoc, MyDoc) + assert issubclass(MySubDoc, document.AsyncDocument) + assert hasattr(MySubDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MySubDoc._doc_type.mapping.to_dict() + + +async def test_child_class_can_override_parent(): + class A(document.AsyncDocument): + o = field.Object(dynamic=False, properties={"a": field.Text()}) + + class B(A): + o = field.Object(dynamic="strict", properties={"b": field.Text()}) + + assert { + "properties": { + "o": { + "dynamic": "strict", + "properties": {"a": {"type": "text"}, "b": {"type": "text"}}, + "type": "object", + } + } + } == B._doc_type.mapping.to_dict() + + +async def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict(): + md = MySubDoc(meta={"id": 42}, name="My First doc!") + + md.meta.index = "my-index" + assert md.meta.index == "my-index" + assert md.meta.id == 42 + assert {"name": "My First doc!"} == md.to_dict() + assert {"id": 42, "index": "my-index"} == md.meta.to_dict() + + +async def test_index_inheritance(): + assert issubclass(MyMultiSubDoc, MySubDoc) + assert issubclass(MyMultiSubDoc, MyDoc2) + assert issubclass(MyMultiSubDoc, document.AsyncDocument) + assert hasattr(MyMultiSubDoc, "_doc_type") + assert hasattr(MyMultiSubDoc, "_index") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + "extra": {"type": "long"}, + } + } == MyMultiSubDoc._doc_type.mapping.to_dict() + + +async def test_meta_fields_can_be_set_directly_in_init(): + p = object() + md = MyDoc(_id=p, title="Hello World!") + + assert md.meta.id is p + + +async def test_save_no_index(mock_client): + md = MyDoc() + with raises(ValidationException): + await md.save(using="mock") + + +async def test_delete_no_index(mock_client): + md = MyDoc() + with raises(ValidationException): + await md.delete(using="mock") + + +async def test_update_no_fields(): + md = MyDoc() + with raises(IllegalOperation): + await md.update() + + +async def test_search_with_custom_alias_and_index(mock_client): + search_object = MyDoc.search( + using="staging", index=["custom_index1", "custom_index2"] + ) + + assert search_object._using == "staging" + assert search_object._index == ["custom_index1", "custom_index2"] + + +async def test_from_opensearch_respects_underscored_non_meta_fields(): + doc = { + "_index": "test-index", + "_id": "opensearch", + "_score": 12.0, + "fields": {"hello": "world", "_routing": "opensearch", "_tags": ["search"]}, + "_source": { + "city": "Amsterdam", + "name": "OpenSearch", + "_tagline": "You know, for search", + }, + } + + class Company(document.AsyncDocument): + class Index: + name = "test-company" + + c = Company.from_opensearch(doc) + + assert c.meta.fields._tags == ["search"] + assert c.meta.fields._routing == "opensearch" + assert c._tagline == "You know, for search" + + +async def test_nested_and_object_inner_doc(): + class MySubDocWithNested(MyDoc): + nested_inner = field.Nested(MyInner) + + props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"] + assert props == { + "created_at": {"type": "date"}, + "inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"}, + "name": {"type": "text"}, + "nested_inner": { + "properties": {"old_field": {"type": "text"}}, + "type": "nested", + }, + "title": {"type": "keyword"}, + } diff --git a/test_opensearchpy/test_async/test_helpers/test_faceted_search.py b/test_opensearchpy/test_async/test_helpers/test_faceted_search.py new file mode 100644 index 00000000..88344cdb --- /dev/null +++ b/test_opensearchpy/test_async/test_helpers/test_faceted_search.py @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from datetime import datetime + +import pytest + +from opensearchpy._async.helpers.faceted_search import AsyncFacetedSearch +from opensearchpy.helpers.faceted_search import DateHistogramFacet, TermsFacet + +pytestmark = pytest.mark.asyncio + + +class BlogSearch(AsyncFacetedSearch): + doc_types = ["user", "post"] + fields = ( + "title^5", + "body", + ) + + facets = { + "category": TermsFacet(field="category.raw"), + "tags": TermsFacet(field="tags"), + } + + +async def test_query_is_created_properly(): + bs = BlogSearch("python search") + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +async def test_query_is_created_properly_with_sort_tuple(): + bs = BlogSearch("python search", sort=("category", "-title")) + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + "sort": ["category", {"title": {"order": "desc"}}], + } == s.to_dict() + + +async def test_filter_is_applied_to_search_but_not_relevant_facet(): + bs = BlogSearch("python search", filters={"category": "opensearch"}) + s = bs.build_search() + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["opensearch"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "post_filter": {"terms": {"category.raw": ["opensearch"]}}, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +async def test_filters_are_applied_to_search_ant_relevant_facets(): + bs = BlogSearch( + "python search", + filters={"category": "opensearch", "tags": ["python", "django"]}, + ) + s = bs.build_search() + + d = s.to_dict() + + # we need to test post_filter without relying on order + f = d["post_filter"]["bool"].pop("must") + assert len(f) == 2 + assert {"terms": {"category.raw": ["opensearch"]}} in f + assert {"terms": {"tags": ["python", "django"]}} in f + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["opensearch"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"terms": {"tags": ["python", "django"]}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ("title^5", "body"), "query": "python search"} + }, + "post_filter": {"bool": {}}, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == d + + +async def test_date_histogram_facet_with_1970_01_01_date(): + dhf = DateHistogramFacet() + assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) + assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) + + +@pytest.mark.parametrize( + ["interval_type", "interval"], + [ + ("interval", "year"), + ("calendar_interval", "year"), + ("interval", "month"), + ("calendar_interval", "month"), + ("interval", "week"), + ("calendar_interval", "week"), + ("interval", "day"), + ("calendar_interval", "day"), + ("fixed_interval", "day"), + ("interval", "hour"), + ("fixed_interval", "hour"), + ("interval", "1Y"), + ("calendar_interval", "1Y"), + ("interval", "1M"), + ("calendar_interval", "1M"), + ("interval", "1w"), + ("calendar_interval", "1w"), + ("interval", "1d"), + ("calendar_interval", "1d"), + ("fixed_interval", "1d"), + ("interval", "1h"), + ("fixed_interval", "1h"), + ], +) +async def test_date_histogram_interval_types(interval_type, interval): + dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) + assert dhf.get_aggregation().to_dict() == { + "date_histogram": { + "field": "@timestamp", + interval_type: interval, + "min_doc_count": 0, + } + } + dhf.get_value_filter(datetime.now()) + + +async def test_date_histogram_no_interval_keyerror(): + dhf = DateHistogramFacet(field="@timestamp") + with pytest.raises(KeyError) as e: + dhf.get_value_filter(datetime.now()) + assert str(e.value) == "'interval'" diff --git a/test_opensearchpy/test_async/test_helpers/test_index.py b/test_opensearchpy/test_async/test_helpers/test_index.py new file mode 100644 index 00000000..b2debbb4 --- /dev/null +++ b/test_opensearchpy/test_async/test_helpers/test_index.py @@ -0,0 +1,178 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import string +from random import choice + +import pytest +from pytest import raises + +from opensearchpy import Date, Text, analyzer +from opensearchpy._async.helpers.document import AsyncDocument +from opensearchpy._async.helpers.index import AsyncIndex + +pytestmark = pytest.mark.asyncio + + +class Post(AsyncDocument): + title = Text() + published_from = Date() + + +async def test_multiple_doc_types_will_combine_mappings(): + class User(AsyncDocument): + username = Text() + + i = AsyncIndex("i") + i.document(Post) + i.document(User) + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "username": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +async def test_search_is_limited_to_index_name(): + i = AsyncIndex("my-index") + s = i.search() + + assert s._index == ["my-index"] + + +async def test_cloned_index_has_copied_settings_and_using(): + client = object() + i = AsyncIndex("my-index", using=client) + i.settings(number_of_shards=1) + + i2 = i.clone("my-other-index") + + assert "my-other-index" == i2._name + assert client is i2._using + assert i._settings == i2._settings + assert i._settings is not i2._settings + + +async def test_cloned_index_has_analysis_attribute(): + """ + Regression test for Issue #582 in which `Index.clone()` was not copying + over the `_analysis` attribute. + """ + client = object() + i = AsyncIndex("my-index", using=client) + + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + i.analyzer(random_analyzer) + + i2 = i.clone("my-clone-index") + + assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"] + + +def test_settings_are_saved(): + i = AsyncIndex("i") + i.settings(number_of_replicas=0) + i.settings(number_of_shards=1) + + assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict() + + +async def test_registered_doc_type_included_in_to_dict(): + i = AsyncIndex("i", using="alias") + i.document(Post) + + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +async def test_registered_doc_type_included_in_search(): + i = AsyncIndex("i", using="alias") + i.document(Post) + + s = i.search() + + assert s._doc_type == [Post] + + +async def test_aliases_add_to_object(): + random_alias = "".join((choice(string.ascii_letters) for _ in range(100))) + alias_dict = {random_alias: {}} + + index = AsyncIndex("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == alias_dict + + +async def test_aliases_returned_from_to_dict(): + random_alias = "".join((choice(string.ascii_letters) for _ in range(100))) + alias_dict = {random_alias: {}} + + index = AsyncIndex("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == index.to_dict()["aliases"] == alias_dict + + +async def test_analyzers_added_to_object(): + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + index = AsyncIndex("i", using="alias") + index.analyzer(random_analyzer) + + assert index._analysis["analyzer"][random_analyzer_name] == { + "filter": ["standard"], + "type": "custom", + "tokenizer": "standard", + } + + +async def test_analyzers_returned_from_to_dict(): + random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100))) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + index = AsyncIndex("i", using="alias") + index.analyzer(random_analyzer) + + assert index.to_dict()["settings"]["analysis"]["analyzer"][ + random_analyzer_name + ] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} + + +async def test_conflicting_analyzer_raises_error(): + i = AsyncIndex("i") + i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"]) + + with raises(ValueError): + i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"]) + + +async def test_index_template_can_have_order(): + i = AsyncIndex("i-*") + it = i.as_template("i", order=2) + + assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict() diff --git a/test_opensearchpy/test_async/test_helpers/test_mapping.py b/test_opensearchpy/test_async/test_helpers/test_mapping.py new file mode 100644 index 00000000..a4fb2b24 --- /dev/null +++ b/test_opensearchpy/test_async/test_helpers/test_mapping.py @@ -0,0 +1,216 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import json + +from opensearchpy import Keyword, Nested, Text +from opensearchpy._async.helpers import mapping +from opensearchpy.helpers import analysis + + +async def test_mapping_can_has_fields(): + m = mapping.AsyncMapping() + m.field("name", "text").field("tags", "keyword") + + assert { + "properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}} + } == m.to_dict() + + +async def test_mapping_update_is_recursive(): + m1 = mapping.AsyncMapping() + m1.field("title", "text") + m1.field("author", "object") + m1.field("author", "object", properties={"name": {"type": "text"}}) + m1.meta("_all", enabled=False) + m1.meta("dynamic", False) + + m2 = mapping.AsyncMapping() + m2.field("published_from", "date") + m2.field("author", "object", properties={"email": {"type": "text"}}) + m2.field("title", "text") + m2.field("lang", "keyword") + m2.meta("_analyzer", path="lang") + + m1.update(m2, update_only=True) + + assert { + "_all": {"enabled": False}, + "_analyzer": {"path": "lang"}, + "dynamic": False, + "properties": { + "published_from": {"type": "date"}, + "title": {"type": "text"}, + "lang": {"type": "keyword"}, + "author": { + "type": "object", + "properties": {"name": {"type": "text"}, "email": {"type": "text"}}, + }, + }, + } == m1.to_dict() + + +async def test_properties_can_iterate_over_all_the_fields(): + m = mapping.AsyncMapping() + m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) + m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) + + assert {"f1", "f2", "f3", "f4"} == { + f.test_attr for f in m.properties._collect_fields() + } + + +async def test_mapping_can_collect_all_analyzers_and_normalizers(): + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer("english") + a3 = analysis.analyzer("unknown_custom") + a4 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") + n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) + n2 = analysis.normalizer( + "my_normalizer2", + filter=[ + "my_filter1", + "my_filter2", + analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), + ], + ) + n3 = analysis.normalizer("unknown_custom") + + m = mapping.AsyncMapping() + m.field( + "title", + "text", + analyzer=a1, + fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)}, + ) + m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) + m.field("normalized_title", "keyword", normalizer=n1) + m.field("normalized_comment", "keyword", normalizer=n2) + m.field("unknown", "keyword", normalizer=n3) + m.meta("_all", analyzer=a5) + + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + "my_analyzer3": {"tokenizer": "keyword", "type": "custom"}, + }, + "normalizer": { + "my_normalizer1": {"filter": ["lowercase"], "type": "custom"}, + "my_normalizer2": { + "filter": ["my_filter1", "my_filter2", "my_filter3"], + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + "my_filter3": {"stopwords": ["e", "f"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + assert json.loads(json.dumps(m.to_dict())) == m.to_dict() + + +async def test_mapping_can_collect_multiple_analyzers(): + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + m = mapping.AsyncMapping() + m.field("title", "text", analyzer=a1, search_analyzer=a2) + m.field( + "text", + "text", + analyzer=a1, + fields={ + "english": Text(analyzer=a1), + "unknown": Keyword(analyzer=a1, search_analyzer=a2), + }, + ) + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + +async def test_even_non_custom_analyzers_can_have_params(): + a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") + m = mapping.AsyncMapping() + m.field("title", "text", analyzer=a1) + + assert { + "analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}} + } == m._collect_analysis() + + +async def test_resolve_field_can_resolve_multifields(): + m = mapping.AsyncMapping() + m.field("title", "text", fields={"keyword": Keyword()}) + + assert isinstance(m.resolve_field("title.keyword"), Keyword) + + +async def test_resolve_nested(): + m = mapping.AsyncMapping() + m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) + m.field("k2", "keyword") + + nested, field = m.resolve_nested("n1.n2.k1") + assert nested == ["n1", "n1.n2"] + assert isinstance(field, Keyword) + + nested, field = m.resolve_nested("k2") + assert nested == [] + assert isinstance(field, Keyword) diff --git a/test_opensearchpy/test_async/test_helpers/test_search.py b/test_opensearchpy/test_async/test_helpers/test_search.py new file mode 100644 index 00000000..5df66804 --- /dev/null +++ b/test_opensearchpy/test_async/test_helpers/test_search.py @@ -0,0 +1,546 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from copy import deepcopy + +import pytest +from pytest import raises + +from opensearchpy._async.helpers import search +from opensearchpy._async.helpers.document import AsyncDocument +from opensearchpy.exceptions import IllegalOperation +from opensearchpy.helpers import query +from opensearchpy.helpers.query import Q + +pytestmark = pytest.mark.asyncio + + +async def test_expand__to_dot_is_respected(): + s = search.AsyncSearch().query("match", a__b=42, _expand__to_dot=False) + + assert {"query": {"match": {"a__b": 42}}} == s.to_dict() + + +async def test_execute_uses_cache(): + s = search.AsyncSearch() + r = object() + s._response = r + + assert r is await s.execute() + + +async def test_cache_isnt_cloned(): + s = search.AsyncSearch() + s._response = object() + + assert not hasattr(s._clone(), "_response") + + +async def test_search_starts_with_no_query(): + s = search.AsyncSearch() + + assert s.query._proxied is None + + +async def test_search_query_combines_query(): + s = search.AsyncSearch() + + s2 = s.query("match", f=42) + assert s2.query._proxied == query.Match(f=42) + assert s.query._proxied is None + + s3 = s2.query("match", f=43) + assert s2.query._proxied == query.Match(f=42) + assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)]) + + +async def test_query_can_be_assigned_to(): + s = search.AsyncSearch() + + q = Q("match", title="python") + s.query = q + + assert s.query._proxied is q + + +async def test_query_can_be_wrapped(): + s = search.AsyncSearch().query("match", title="python") + + s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"}) + + assert { + "query": { + "function_score": { + "functions": [{"field_value_factor": {"field": "rating"}}], + "query": {"match": {"title": "python"}}, + } + } + } == s.to_dict() + + +async def test_using(): + o = object() + o2 = object() + s = search.AsyncSearch(using=o) + assert s._using is o + s2 = s.using(o2) + assert s._using is o + assert s2._using is o2 + + +async def test_methods_are_proxied_to_the_query(): + s = search.AsyncSearch().query("match_all") + + assert s.query.to_dict() == {"match_all": {}} + + +async def test_query_always_returns_search(): + s = search.AsyncSearch() + + assert isinstance(s.query("match", f=42), search.AsyncSearch) + + +async def test_source_copied_on_clone(): + s = search.AsyncSearch().source(False) + assert s._clone()._source == s._source + assert s._clone()._source is False + + s2 = search.AsyncSearch().source([]) + assert s2._clone()._source == s2._source + assert s2._source == [] + + s3 = search.AsyncSearch().source(["some", "fields"]) + assert s3._clone()._source == s3._source + assert s3._clone()._source == ["some", "fields"] + + +async def test_copy_clones(): + from copy import copy + + s1 = search.AsyncSearch().source(["some", "fields"]) + s2 = copy(s1) + + assert s1 == s2 + assert s1 is not s2 + + +async def test_aggs_allow_two_metric(): + s = search.AsyncSearch() + + s.aggs.metric("a", "max", field="a").metric("b", "max", field="b") + + assert s.to_dict() == { + "aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}} + } + + +async def test_aggs_get_copied_on_change(): + s = search.AsyncSearch().query("match_all") + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + + s2 = s.query("match_all") + s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month") + s3 = s2.query("match_all") + s3.aggs["per_month"].metric("max_score", "max", field="score") + s4 = s3._clone() + s4.aggs.metric("max_score", "max", field="score") + + d = { + "query": {"match_all": {}}, + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + } + + assert d == s.to_dict() + d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}} + assert d == s2.to_dict() + d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}} + assert d == s3.to_dict() + d["aggs"]["max_score"] = {"max": {"field": "score"}} + assert d == s4.to_dict() + + +async def test_search_index(): + s = search.AsyncSearch(index="i") + assert s._index == ["i"] + s = s.index("i2") + assert s._index == ["i", "i2"] + s = s.index("i3") + assert s._index == ["i", "i2", "i3"] + s = s.index() + assert s._index is None + s = search.AsyncSearch(index=("i", "i2")) + assert s._index == ["i", "i2"] + s = search.AsyncSearch(index=["i", "i2"]) + assert s._index == ["i", "i2"] + s = search.AsyncSearch() + s = s.index("i", "i2") + assert s._index == ["i", "i2"] + s2 = s.index("i3") + assert s._index == ["i", "i2"] + assert s2._index == ["i", "i2", "i3"] + s = search.AsyncSearch() + s = s.index(["i", "i2"], "i3") + assert s._index == ["i", "i2", "i3"] + s2 = s.index("i4") + assert s._index == ["i", "i2", "i3"] + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(["i4"]) + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(("i4", "i5")) + assert s2._index == ["i", "i2", "i3", "i4", "i5"] + + +async def test_doc_type_document_class(): + class MyDocument(AsyncDocument): + pass + + s = search.AsyncSearch(doc_type=MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + s = search.AsyncSearch().doc_type(MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + +async def test_sort(): + s = search.AsyncSearch() + s = s.sort("fielda", "-fieldb") + + assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort + assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict() + + s = s.sort() + assert [] == s._sort + assert search.AsyncSearch().to_dict() == s.to_dict() + + +async def test_sort_by_score(): + s = search.AsyncSearch() + s = s.sort("_score") + assert {"sort": ["_score"]} == s.to_dict() + + s = search.AsyncSearch() + with raises(IllegalOperation): + s.sort("-_score") + + +async def test_slice(): + s = search.AsyncSearch() + assert {"from": 3, "size": 7} == s[3:10].to_dict() + assert {"from": 0, "size": 5} == s[:5].to_dict() + assert {"from": 3, "size": 10} == s[3:].to_dict() + assert {"from": 0, "size": 0} == s[0:0].to_dict() + assert {"from": 20, "size": 0} == s[20:0].to_dict() + + +async def test_index(): + s = search.AsyncSearch() + assert {"from": 3, "size": 1} == s[3].to_dict() + + +async def test_search_to_dict(): + s = search.AsyncSearch() + assert {} == s.to_dict() + + s = s.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == s.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10) + + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + d = { + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + "query": {"match": {"f": 42}}, + } + assert d == s.to_dict() + + s = search.AsyncSearch(extra={"size": 5}) + assert {"size": 5} == s.to_dict() + s = s.extra(from_=42) + assert {"size": 5, "from": 42} == s.to_dict() + + +async def test_complex_example(): + s = search.AsyncSearch() + s = ( + s.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .post_filter("terms", tags=["prague", "czech"]) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + + s.aggs.bucket("per_country", "terms", field="country").metric( + "avg_attendees", "avg", field="attendees" + ) + + s.query.minimum_should_match = 2 + + s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50) + + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "highlight": { + "order": "score", + "fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}}, + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +async def test_reverse(): + d = { + "query": { + "filtered": { + "filter": { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + }, + "query": { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + } + }, + "post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "sort": ["title", {"category": {"order": "desc"}}, "_score"], + "size": 5, + "highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}}, + "suggest": { + "my-title-suggestions-1": { + "text": "devloping distibutd saerch engies", + "term": {"size": 3, "field": "title"}, + } + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } + + d2 = deepcopy(d) + + s = search.AsyncSearch.from_dict(d) + + # make sure we haven't modified anything in place + assert d == d2 + assert {"size": 5} == s._extra + assert d == s.to_dict() + + +async def test_from_dict_doesnt_need_query(): + s = search.AsyncSearch.from_dict({"size": 5}) + + assert {"size": 5} == s.to_dict() + + +async def test_source(): + assert {} == search.AsyncSearch().source().to_dict() + + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]} + } == search.AsyncSearch().source( + includes=["foo.bar.*"], excludes=["foo.one"] + ).to_dict() + + assert {"_source": False} == search.AsyncSearch().source(False).to_dict() + + assert {"_source": ["f1", "f2"]} == search.AsyncSearch().source( + includes=["foo.bar.*"], excludes=["foo.one"] + ).source(["f1", "f2"]).to_dict() + + +async def test_source_on_clone(): + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == search.AsyncSearch().source(includes=["foo.bar.*"]).source( + excludes=["foo.one"] + ).filter( + "term", title="python" + ).to_dict() + assert { + "_source": False, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == search.AsyncSearch().source(False).filter("term", title="python").to_dict() + + +async def test_source_on_clear(): + assert ( + {} + == search.AsyncSearch() + .source(includes=["foo.bar.*"]) + .source(includes=None, excludes=None) + .to_dict() + ) + + +async def test_suggest_accepts_global_text(): + s = search.AsyncSearch.from_dict( + { + "suggest": { + "text": "the amsterdma meetpu", + "my-suggest-1": {"term": {"field": "title"}}, + "my-suggest-2": {"text": "other", "term": {"field": "body"}}, + } + } + ) + + assert { + "suggest": { + "my-suggest-1": { + "term": {"field": "title"}, + "text": "the amsterdma meetpu", + }, + "my-suggest-2": {"term": {"field": "body"}, "text": "other"}, + } + } == s.to_dict() + + +async def test_suggest(): + s = search.AsyncSearch() + s = s.suggest("my_suggestion", "pyhton", term={"field": "title"}) + + assert { + "suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}} + } == s.to_dict() + + +async def test_exclude(): + s = search.AsyncSearch() + s = s.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == s.to_dict() + + +async def test_update_from_dict(): + s = search.AsyncSearch() + s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) + s.update_from_dict({"_source": ["id", "name"]}) + + assert { + "indices_boost": [{"important-documents": 2}], + "_source": ["id", "name"], + } == s.to_dict() + + +async def test_rescore_query_to_dict(): + s = search.AsyncSearch(index="index-name") + + positive_query = Q( + "function_score", + query=Q("term", tags="a"), + script_score={"script": "_score * 1"}, + ) + + negative_query = Q( + "function_score", + query=Q("term", tags="b"), + script_score={"script": "_score * -100"}, + ) + + s = s.query(positive_query) + s = s.extra( + rescore={"window_size": 100, "query": {"rescore_query": negative_query}} + ) + assert s.to_dict() == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 100, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "b"}}, + "functions": [{"script_score": {"script": "_score * -100"}}], + } + } + }, + }, + } + + assert s.to_dict( + rescore={"window_size": 10, "query": {"rescore_query": positive_query}} + ) == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 10, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + } + }, + }, + } diff --git a/test_opensearchpy/test_async/test_helpers/test_update_by_query.py b/test_opensearchpy/test_async/test_helpers/test_update_by_query.py new file mode 100644 index 00000000..c535f15a --- /dev/null +++ b/test_opensearchpy/test_async/test_helpers/test_update_by_query.py @@ -0,0 +1,162 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from copy import deepcopy + +import pytest + +from opensearchpy import Q +from opensearchpy._async.helpers import update_by_query +from opensearchpy.helpers.response import UpdateByQueryResponse + +pytestmark = pytest.mark.asyncio + + +async def test_ubq_starts_with_no_query(): + ubq = update_by_query.AsyncUpdateByQuery() + + assert ubq.query._proxied is None + + +async def test_ubq_to_dict(): + ubq = update_by_query.AsyncUpdateByQuery() + assert {} == ubq.to_dict() + + ubq = ubq.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == ubq.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) + + ubq = update_by_query.AsyncUpdateByQuery(extra={"size": 5}) + assert {"size": 5} == ubq.to_dict() + + ubq = update_by_query.AsyncUpdateByQuery( + extra={"extra_q": Q("term", category="conference")} + ) + assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() + + +async def test_complex_example(): + ubq = update_by_query.AsyncUpdateByQuery() + ubq = ( + ubq.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + ) + + ubq.query.minimum_should_match = 2 + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } == ubq.to_dict() + + +async def test_exclude(): + ubq = update_by_query.AsyncUpdateByQuery() + ubq = ubq.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == ubq.to_dict() + + +async def test_reverse(): + d = { + "query": { + "filtered": { + "filter": { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + }, + "query": { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } + + d2 = deepcopy(d) + + ubq = update_by_query.AsyncUpdateByQuery.from_dict(d) + + assert d == d2 + assert d == ubq.to_dict() + + +async def test_from_dict_doesnt_need_query(): + ubq = update_by_query.AsyncUpdateByQuery.from_dict({"script": {"source": "test"}}) + + assert {"script": {"source": "test"}} == ubq.to_dict() + + +async def test_overwrite_script(): + ubq = update_by_query.AsyncUpdateByQuery() + ubq = ubq.script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + assert { + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + } + } == ubq.to_dict() + ubq = ubq.script(source="ctx._source.likes++") + assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict() + + +async def test_update_by_query_response_success(): + ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": []}) + assert ubqr.success() + + ubqr = UpdateByQueryResponse({}, {"timed_out": True, "failures": []}) + assert not ubqr.success() + + ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": [{}]}) + assert not ubqr.success() diff --git a/test_opensearchpy/test_async/test_server/test_helpers/conftest.py b/test_opensearchpy/test_async/test_server/test_helpers/conftest.py new file mode 100644 index 00000000..dfaf31fa --- /dev/null +++ b/test_opensearchpy/test_async/test_server/test_helpers/conftest.py @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import asyncio +import re +from datetime import datetime + +import pytest +from pytest import fixture +from test_data import ( + DATA, + FLAT_DATA, + TEST_GIT_DATA, + create_flat_git_index, + create_git_index, +) + +from opensearchpy._async.helpers.actions import async_bulk +from opensearchpy._async.helpers.test import get_test_client +from opensearchpy.connection.async_connections import add_connection +from test_opensearchpy.test_server.test_helpers.test_document import ( + Comment, + History, + PullRequest, + User, +) + +pytestmark = pytest.mark.asyncio + + +@pytest.fixture(scope="session") +def event_loop(): + loop = asyncio.get_event_loop() + yield loop + loop.close() + + +@fixture(scope="session") +async def client(): + client = await get_test_client(verify_certs=False, http_auth=("admin", "admin")) + await add_connection("default", client) + return client + + +@fixture(scope="session") +async def opensearch_version(client): + info = await client.info() + print(info) + yield tuple( + int(x) + for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".") + ) + + +@fixture +async def write_client(client): + yield client + await client.indices.delete("test-*", ignore=404) + await client.indices.delete_template("test-template", ignore=404) + + +@fixture +async def data_client(client): + # create mappings + await create_git_index(client, "git") + await create_flat_git_index(client, "flat-git") + # load data + await async_bulk(client, DATA, raise_on_error=True, refresh=True) + await async_bulk(client, FLAT_DATA, raise_on_error=True, refresh=True) + yield client + await client.indices.delete("git", ignore=404) + await client.indices.delete("flat-git", ignore=404) + + +@fixture +def pull_request(write_client): + PullRequest.init() + pr = PullRequest( + _id=42, + comments=[ + Comment( + content="Hello World!", + author=User(name="honzakral"), + created_at=datetime(2018, 1, 9, 10, 17, 3, 21184), + history=[ + History( + timestamp=datetime(2012, 1, 1), + diff="-Ahoj Svete!\n+Hello World!", + ) + ], + ), + ], + created_at=datetime(2018, 1, 9, 9, 17, 3, 21184), + ) + pr.save(refresh=True) + return pr + + +@fixture +async def setup_ubq_tests(client): + index = "test-git" + await create_git_index(client, index) + await async_bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True) + return index diff --git a/test_opensearchpy/test_async/test_server/test_helpers.py b/test_opensearchpy/test_async/test_server/test_helpers/test_actions.py similarity index 91% rename from test_opensearchpy/test_async/test_server/test_helpers.py rename to test_opensearchpy/test_async/test_server/test_helpers/test_actions.py index bdc20f9b..7355c71d 100644 --- a/test_opensearchpy/test_async/test_server/test_helpers.py +++ b/test_opensearchpy/test_async/test_server/test_helpers/test_actions.py @@ -25,16 +25,13 @@ # under the License. -# Licensed to Elasticsearch B.V.under one or more agreements. -# Elasticsearch B.V.licenses this file to you under the Apache 2.0 License. -# See the LICENSE file in the project root for more information - import asyncio import pytest from mock import MagicMock, patch -from opensearchpy import TransportError, helpers +from opensearchpy import TransportError +from opensearchpy._async.helpers import actions from opensearchpy.helpers import BulkIndexError, ScanError pytestmark = pytest.mark.asyncio @@ -67,16 +64,16 @@ async def bulk(self, *args, **kwargs): class TestStreamingBulk(object): async def test_actions_remain_unchanged(self, async_client): - actions = [{"_id": 1}, {"_id": 2}] - async for ok, item in helpers.async_streaming_bulk( - async_client, actions, index="test-index" + actions1 = [{"_id": 1}, {"_id": 2}] + async for ok, item in actions.async_streaming_bulk( + async_client, actions1, index="test-index" ): assert ok - assert [{"_id": 1}, {"_id": 2}] == actions + assert [{"_id": 1}, {"_id": 2}] == actions1 async def test_all_documents_get_inserted(self, async_client): docs = [{"answer": x, "_id": x} for x in range(100)] - async for ok, item in helpers.async_streaming_bulk( + async for ok, item in actions.async_streaming_bulk( async_client, docs, index="test-index", refresh=True ): assert ok @@ -96,7 +93,7 @@ def sync_gen(): for x in range(100): yield {"answer": x, "_id": x} - async for ok, item in helpers.async_streaming_bulk( + async for ok, item in actions.async_streaming_bulk( async_client, async_gen(), index="test-index", refresh=True ): assert ok @@ -110,7 +107,7 @@ def sync_gen(): index="test-index", body={"query": {"match_all": {}}} ) - async for ok, item in helpers.async_streaming_bulk( + async for ok, item in actions.async_streaming_bulk( async_client, sync_gen(), index="test-index", refresh=True ): assert ok @@ -131,7 +128,7 @@ async def test_all_errors_from_chunk_are_raised_on_failure(self, async_client): await async_client.cluster.health(wait_for_status="yellow") try: - async for ok, item in helpers.async_streaming_bulk( + async for ok, item in actions.async_streaming_bulk( async_client, [{"a": "b"}, {"a": "c"}], index="i", raise_on_error=True ): assert ok @@ -148,7 +145,7 @@ async def test_different_op_types(self, async_client): {"_op_type": "delete", "_index": "i", "_id": 45}, {"_op_type": "update", "_index": "i", "_id": 42, "doc": {"answer": 42}}, ] - async for ok, item in helpers.async_streaming_bulk(async_client, docs): + async for ok, item in actions.async_streaming_bulk(async_client, docs): assert ok assert not await async_client.exists(index="i", id=45) @@ -165,7 +162,7 @@ async def test_transport_error_can_becaught(self, async_client): results = [ x - async for x in helpers.async_streaming_bulk( + async for x in actions.async_streaming_bulk( failing_client, docs, raise_on_exception=False, @@ -200,7 +197,7 @@ async def test_rejected_documents_are_retried(self, async_client): ] results = [ x - async for x in helpers.async_streaming_bulk( + async for x in actions.async_streaming_bulk( failing_client, docs, raise_on_exception=False, @@ -231,7 +228,7 @@ async def test_rejected_documents_are_retried_at_most_max_retries_times( ] results = [ x - async for x in helpers.async_streaming_bulk( + async for x in actions.async_streaming_bulk( failing_client, docs, raise_on_exception=False, @@ -258,7 +255,7 @@ async def test_transport_error_is_raised_with_max_retries(self, async_client): async def streaming_bulk(): results = [ x - async for x in helpers.async_streaming_bulk( + async for x in actions.async_streaming_bulk( failing_client, [{"a": 42}, {"a": 39}], raise_on_exception=True, @@ -276,7 +273,7 @@ async def streaming_bulk(): class TestBulk(object): async def test_bulk_works_with_single_item(self, async_client): docs = [{"answer": 42, "_id": 1}] - success, failed = await helpers.async_bulk( + success, failed = await actions.async_bulk( async_client, docs, index="test-index", refresh=True ) @@ -289,7 +286,7 @@ async def test_bulk_works_with_single_item(self, async_client): async def test_all_documents_get_inserted(self, async_client): docs = [{"answer": x, "_id": x} for x in range(100)] - success, failed = await helpers.async_bulk( + success, failed = await actions.async_bulk( async_client, docs, index="test-index", refresh=True ) @@ -302,7 +299,7 @@ async def test_all_documents_get_inserted(self, async_client): async def test_stats_only_reports_numbers(self, async_client): docs = [{"answer": x} for x in range(100)] - success, failed = await helpers.async_bulk( + success, failed = await actions.async_bulk( async_client, docs, index="test-index", refresh=True, stats_only=True ) @@ -320,7 +317,7 @@ async def test_errors_are_reported_correctly(self, async_client): ) await async_client.cluster.health(wait_for_status="yellow") - success, failed = await helpers.async_bulk( + success, failed = await actions.async_bulk( async_client, [{"a": 42}, {"a": "c", "_id": 42}], index="i", @@ -347,16 +344,16 @@ async def test_error_is_raised(self, async_client): await async_client.cluster.health(wait_for_status="yellow") with pytest.raises(BulkIndexError): - await helpers.async_bulk(async_client, [{"a": 42}, {"a": "c"}], index="i") + await actions.async_bulk(async_client, [{"a": 42}, {"a": "c"}], index="i") async def test_ignore_error_if_raised(self, async_client): # ignore the status code 400 in tuple - await helpers.async_bulk( + await actions.async_bulk( async_client, [{"a": 42}, {"a": "c"}], index="i", ignore_status=(400,) ) # ignore the status code 400 in list - await helpers.async_bulk( + await actions.async_bulk( async_client, [{"a": 42}, {"a": "c"}], index="i", @@ -366,19 +363,19 @@ async def test_ignore_error_if_raised(self, async_client): ) # ignore the status code 400 - await helpers.async_bulk( + await actions.async_bulk( async_client, [{"a": 42}, {"a": "c"}], index="i", ignore_status=400 ) # ignore only the status code in the `ignore_status` argument with pytest.raises(BulkIndexError): - await helpers.async_bulk( + await actions.async_bulk( async_client, [{"a": 42}, {"a": "c"}], index="i", ignore_status=(444,) ) # ignore transport error exception failing_client = FailingBulkClient(async_client) - await helpers.async_bulk( + await actions.async_bulk( failing_client, [{"a": 42}], index="i", ignore_status=(599,) ) @@ -392,7 +389,7 @@ async def test_errors_are_collected_properly(self, async_client): ) await async_client.cluster.health(wait_for_status="yellow") - success, failed = await helpers.async_bulk( + success, failed = await actions.async_bulk( async_client, [{"a": 42}, {"a": "c"}], index="i", @@ -452,7 +449,7 @@ async def test_order_can_be_preserved(self, async_client, scan_teardown): docs = [ doc - async for doc in helpers.async_scan( + async for doc in actions.async_scan( async_client, index="test_index", query={"sort": "answer"}, @@ -473,7 +470,7 @@ async def test_all_documents_are_read(self, async_client, scan_teardown): docs = [ x - async for x in helpers.async_scan(async_client, index="test_index", size=2) + async for x in actions.async_scan(async_client, index="test_index", size=2) ] assert 100 == len(docs) @@ -490,7 +487,7 @@ async def test_scroll_error(self, async_client, scan_teardown): with patch.object(async_client, "scroll", MockScroll()): data = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2, @@ -505,7 +502,7 @@ async def test_scroll_error(self, async_client, scan_teardown): with pytest.raises(ScanError): data = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2, @@ -532,7 +529,7 @@ async def test_initial_search_error(self, async_client, scan_teardown): with patch.object(async_client, "scroll", MockScroll()): data = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2, @@ -556,7 +553,7 @@ async def test_initial_search_error(self, async_client, scan_teardown): with pytest.raises(ScanError): data = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2, @@ -572,7 +569,7 @@ async def test_no_scroll_id_fast_route(self, async_client, scan_teardown): with patch.object(async_client, "clear_scroll") as clear_mock: data = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index" ) ] @@ -581,7 +578,7 @@ async def test_no_scroll_id_fast_route(self, async_client, scan_teardown): scroll_mock.assert_not_called() clear_mock.assert_not_called() - @patch("opensearchpy._async.helpers.logger") + @patch("opensearchpy._async.helpers.actions.logger") async def test_logger(self, logger_mock, async_client, scan_teardown): bulk = [] for x in range(4): @@ -592,7 +589,7 @@ async def test_logger(self, logger_mock, async_client, scan_teardown): with patch.object(async_client, "scroll", MockScroll()): _ = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2, @@ -606,7 +603,7 @@ async def test_logger(self, logger_mock, async_client, scan_teardown): try: _ = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2, @@ -635,7 +632,7 @@ async def test_clear_scroll(self, async_client, scan_teardown): ) as spy: _ = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2 ) ] @@ -644,7 +641,7 @@ async def test_clear_scroll(self, async_client, scan_teardown): spy.reset_mock() _ = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2, clear_scroll=True ) ] @@ -653,7 +650,7 @@ async def test_clear_scroll(self, async_client, scan_teardown): spy.reset_mock() _ = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", size=2, clear_scroll=False ) ] @@ -699,7 +696,7 @@ async def test_scan_auth_kwargs_forwarded( ) as clear_mock: data = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", **kwargs ) ] @@ -739,7 +736,7 @@ async def test_scan_auth_kwargs_favor_scroll_kwargs_option( ): data = [ x - async for x in helpers.async_scan( + async for x in actions.async_scan( async_client, index="test_index", headers={"not scroll": "kwargs"}, @@ -779,7 +776,7 @@ class TestReindex(object): async def test_reindex_passes_kwargs_to_scan_and_bulk( self, async_client, reindex_setup ): - await helpers.async_reindex( + await actions.async_reindex( async_client, "test_index", "prod_index", @@ -798,7 +795,7 @@ async def test_reindex_passes_kwargs_to_scan_and_bulk( )["_source"] async def test_reindex_accepts_a_query(self, async_client, reindex_setup): - await helpers.async_reindex( + await actions.async_reindex( async_client, "test_index", "prod_index", @@ -817,7 +814,7 @@ async def test_reindex_accepts_a_query(self, async_client, reindex_setup): )["_source"] async def test_all_documents_get_moved(self, async_client, reindex_setup): - await helpers.async_reindex(async_client, "test_index", "prod_index") + await actions.async_reindex(async_client, "test_index", "prod_index") await async_client.indices.refresh() assert await async_client.indices.exists("prod_index") @@ -869,7 +866,7 @@ class TestParentChildReindex: async def test_children_are_reindexed_correctly( self, async_client, parent_reindex_setup ): - await helpers.async_reindex(async_client, "test-index", "real-index") + await actions.async_reindex(async_client, "test-index", "real-index") assert {"question_answer": "question"} == ( await async_client.get(index="real-index", id=42) )["_source"] diff --git a/test_opensearchpy/test_async/test_server/test_helpers/test_data.py b/test_opensearchpy/test_async/test_server/test_helpers/test_data.py new file mode 100644 index 00000000..d513bcff --- /dev/null +++ b/test_opensearchpy/test_async/test_server/test_helpers/test_data.py @@ -0,0 +1,1097 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from __future__ import unicode_literals + + +async def create_flat_git_index(client, index): + # we will use user on several places + user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} + } + + await client.indices.create( + index=index, + body={ + "settings": { + # just one shard, no replicas for testing + "number_of_shards": 1, + "number_of_replicas": 0, + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + "description": {"type": "text", "analyzer": "snowball"}, + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + } + }, + }, + ) + + +async def create_git_index(client, index): + # we will use user on several places + user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} + } + + await client.indices.create( + index=index, + body={ + "settings": { + # just one shard, no replicas for testing + "number_of_shards": 1, + "number_of_replicas": 0, + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + # common fields + "description": {"type": "text", "analyzer": "snowball"}, + "commit_repo": {"type": "join", "relations": {"repo": "commit"}}, + # COMMIT mappings + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + # REPO mappings + "is_public": {"type": "boolean"}, + "owner": user_mapping, + "created_at": {"type": "date"}, + "tags": {"type": "keyword"}, + } + }, + }, + ) + + +DATA = [ + # repository + { + "_id": "opensearch-py", + "_source": { + "commit_repo": "repo", + "organization": "opensearch", + "created_at": "2014-03-03", + "owner": {"name": "opensearch"}, + "is_public": True, + }, + "_index": "git", + }, + # documents + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_aggs.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 7, "insertions": 23, "lines": 30, "files": 4}, + "description": "Make sure buckets aren't modified in-place", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["eb3e543323f189fd7b698e66295427204fff5755"], + "committed_date": "2014-05-02T13:47:19", + "authored_date": "2014-05-02T13:47:19.123+02:00", + }, + "_index": "git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 18, "lines": 18, "files": 1}, + "description": "Add communication with OpenSearch server", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["dd15b6ba17dd9ba16363a51f85b31f66f1fb1157"], + "committed_date": "2014-05-01T13:32:14", + "authored_date": "2014-05-01T13:32:14", + }, + "_index": "git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "test_opensearchpy/test_dsl/test_result.py", + "opensearchpy/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 18, "insertions": 44, "lines": 62, "files": 3}, + "description": "Minor cleanup and adding helpers for interactive python", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ed19caf25abd25300e707fadf3f81b05c5673446"], + "committed_date": "2014-05-01T13:30:44", + "authored_date": "2014-05-01T13:30:44", + }, + "_index": "git", + }, + { + "_id": "ed19caf25abd25300e707fadf3f81b05c5673446", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 28, "lines": 28, "files": 3}, + "description": "Make sure aggs do copy-on-write", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["583e52c71e9a72c1b291ec5843683d8fa8f1ce2d"], + "committed_date": "2014-04-27T16:28:09", + "authored_date": "2014-04-27T16:28:09", + }, + "_index": "git", + }, + { + "_id": "583e52c71e9a72c1b291ec5843683d8fa8f1ce2d", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/aggs.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 1, "lines": 2, "files": 1}, + "description": "Use __setitem__ from DslBase in AggsBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dd19210b5be92b960f7db6f66ae526288edccc3"], + "committed_date": "2014-04-27T15:51:53", + "authored_date": "2014-04-27T15:51:53", + }, + "_index": "git", + }, + { + "_id": "1dd19210b5be92b960f7db6f66ae526288edccc3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_search.py", + "opensearchpy/search.py", + "opensearchpy/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 21, "insertions": 98, "lines": 119, "files": 5}, + "description": "Have Search clone itself on any change besides aggs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b4c9e29376af2e42a4e6dc153f0f293b1a18bac3"], + "committed_date": "2014-04-26T14:49:43", + "authored_date": "2014-04-26T14:49:43", + }, + "_index": "git", + }, + { + "_id": "b4c9e29376af2e42a4e6dc153f0f293b1a18bac3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 5, "lines": 5, "files": 1}, + "description": "Add tests for [] on response", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a64a54181b232bb5943bd16960be9416e402f5f5"], + "committed_date": "2014-04-26T13:56:52", + "authored_date": "2014-04-26T13:56:52", + }, + "_index": "git", + }, + { + "_id": "a64a54181b232bb5943bd16960be9416e402f5f5", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 7, "lines": 8, "files": 1}, + "description": "Test access to missing fields raises appropriate exceptions", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["df3f778a3d37b170bde6979a4ef2d9e3e6400778"], + "committed_date": "2014-04-25T16:01:07", + "authored_date": "2014-04-25T16:01:07", + }, + "_index": "git", + }, + { + "_id": "df3f778a3d37b170bde6979a4ef2d9e3e6400778", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "test_opensearchpy/test_dsl/test_result.py", + "opensearchpy/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 8, "insertions": 31, "lines": 39, "files": 3}, + "description": "Support attribute access even for inner/nested objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925"], + "committed_date": "2014-04-25T15:59:02", + "authored_date": "2014-04-25T15:59:02", + }, + "_index": "git", + }, + { + "_id": "7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "test_opensearchpy/test_dsl/test_result.py", + "opensearchpy/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 149, "lines": 149, "files": 2}, + "description": "Added a prototype of a Respose and Result classes", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e2882d28cb8077eaa3e5d8ae76543482d4d90f7e"], + "committed_date": "2014-04-25T15:12:15", + "authored_date": "2014-04-25T15:12:15", + }, + "_index": "git", + }, + { + "_id": "e2882d28cb8077eaa3e5d8ae76543482d4d90f7e", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["docs/index.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "add warning to the docs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["51f94d83d1c47d3b81207736ca97a1ec6302678f"], + "committed_date": "2014-04-22T19:16:21", + "authored_date": "2014-04-22T19:16:21", + }, + "_index": "git", + }, + { + "_id": "51f94d83d1c47d3b81207736ca97a1ec6302678f", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 29, "lines": 32, "files": 1}, + "description": "Add some comments to the code", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0950f6c600b49e2bf012d03b02250fb71c848555"], + "committed_date": "2014-04-22T19:12:06", + "authored_date": "2014-04-22T19:12:06", + }, + "_index": "git", + }, + { + "_id": "0950f6c600b49e2bf012d03b02250fb71c848555", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "Added a WIP warning", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["54d058f5ac6be8225ef61d5529772aada42ec6c8"], + "committed_date": "2014-04-20T00:19:25", + "authored_date": "2014-04-20T00:19:25", + }, + "_index": "git", + }, + { + "_id": "54d058f5ac6be8225ef61d5529772aada42ec6c8", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/__init__.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 36, "insertions": 7, "lines": 43, "files": 3}, + "description": "Remove the operator kwarg from .query", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4cb07845e45787abc1f850c0b561e487e0034424"], + "committed_date": "2014-04-20T00:17:25", + "authored_date": "2014-04-20T00:17:25", + }, + "_index": "git", + }, + { + "_id": "4cb07845e45787abc1f850c0b561e487e0034424", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 35, "insertions": 49, "lines": 84, "files": 2}, + "description": "Complex example", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["578abe80f76aafd7e81fe46a44403e601733a938"], + "committed_date": "2014-03-24T20:48:45", + "authored_date": "2014-03-24T20:48:45", + }, + "_index": "git", + }, + { + "_id": "578abe80f76aafd7e81fe46a44403e601733a938", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 0, "lines": 2, "files": 1}, + "description": "removing extra whitespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ecb84f03565940c7d294dbc80723420dcfbab340"], + "committed_date": "2014-03-24T20:42:23", + "authored_date": "2014-03-24T20:42:23", + }, + "_index": "git", + }, + { + "_id": "ecb84f03565940c7d294dbc80723420dcfbab340", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 3, "lines": 4, "files": 1}, + "description": "Make sure attribute access works for .query on Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["9a247c876ab66e2bca56b25f392d054e613b1b2a"], + "committed_date": "2014-03-24T20:35:02", + "authored_date": "2014-03-24T20:34:46", + }, + "_index": "git", + }, + { + "_id": "9a247c876ab66e2bca56b25f392d054e613b1b2a", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 2, "lines": 2, "files": 1}, + "description": "Make sure .index and .doc_type methods are chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["cee5e46947d510a49edd3609ff91aab7b1f3ac89"], + "committed_date": "2014-03-24T20:27:46", + "authored_date": "2014-03-24T20:27:46", + }, + "_index": "git", + }, + { + "_id": "cee5e46947d510a49edd3609ff91aab7b1f3ac89", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + "opensearchpy/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 13, "insertions": 128, "lines": 141, "files": 3}, + "description": "Added .filter and .post_filter to Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1d6857182b09a556d58c6bc5bdcb243092812ba3"], + "committed_date": "2014-03-24T20:26:57", + "authored_date": "2014-03-24T20:26:57", + }, + "_index": "git", + }, + { + "_id": "1d6857182b09a556d58c6bc5bdcb243092812ba3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py", "opensearchpy/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 29, "lines": 53, "files": 2}, + "description": "Extracted combination logic into DslBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4ad92f15a1955846c01642318303a821e8435b75"], + "committed_date": "2014-03-24T20:03:51", + "authored_date": "2014-03-24T20:03:51", + }, + "_index": "git", + }, + { + "_id": "4ad92f15a1955846c01642318303a821e8435b75", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py", "opensearchpy/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 43, "insertions": 45, "lines": 88, "files": 2}, + "description": "Extracted bool-related logic to a mixin to be reused by filters", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6eb39dc2825605543ac1ed0b45b9b6baeecc44c2"], + "committed_date": "2014-03-24T19:16:16", + "authored_date": "2014-03-24T19:16:16", + }, + "_index": "git", + }, + { + "_id": "6eb39dc2825605543ac1ed0b45b9b6baeecc44c2", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 32, "lines": 33, "files": 2}, + "description": "Enable otheroperators when querying on Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["be094c7b307332cb6039bf9a7c984d2c7593ddff"], + "committed_date": "2014-03-24T18:25:10", + "authored_date": "2014-03-24T18:25:10", + }, + "_index": "git", + }, + { + "_id": "be094c7b307332cb6039bf9a7c984d2c7593ddff", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 23, "insertions": 35, "lines": 58, "files": 3}, + "description": "make sure query operations always return copies", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b2576e3b6437e2cb9d8971fee4ead60df91fd75b"], + "committed_date": "2014-03-24T18:10:37", + "authored_date": "2014-03-24T18:03:13", + }, + "_index": "git", + }, + { + "_id": "b2576e3b6437e2cb9d8971fee4ead60df91fd75b", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 53, "lines": 54, "files": 2}, + "description": "Adding or operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1be002170ac3cd59d2e97824b83b88bb3c9c60ed"], + "committed_date": "2014-03-24T17:53:38", + "authored_date": "2014-03-24T17:53:38", + }, + "_index": "git", + }, + { + "_id": "1be002170ac3cd59d2e97824b83b88bb3c9c60ed", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 35, "lines": 35, "files": 2}, + "description": "Added inverting of queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["24e1e38b2f704f65440d96c290b7c6cd54c2e00e"], + "committed_date": "2014-03-23T17:44:36", + "authored_date": "2014-03-23T17:44:36", + }, + "_index": "git", + }, + { + "_id": "24e1e38b2f704f65440d96c290b7c6cd54c2e00e", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/aggs.py", "opensearchpy/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 5, "insertions": 1, "lines": 6, "files": 2}, + "description": "Change equality checks to use .to_dict()", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277cfaedbaf3705ed74ad6296227e1172c97a63f"], + "committed_date": "2014-03-23T17:43:01", + "authored_date": "2014-03-23T17:43:01", + }, + "_index": "git", + }, + { + "_id": "277cfaedbaf3705ed74ad6296227e1172c97a63f", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 11, "lines": 12, "files": 2}, + "description": "Test combining of bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6aa3868a6a9f35f71553ce96f9d3d63c74d054fd"], + "committed_date": "2014-03-21T15:15:06", + "authored_date": "2014-03-21T15:15:06", + }, + "_index": "git", + }, + { + "_id": "6aa3868a6a9f35f71553ce96f9d3d63c74d054fd", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 23, "lines": 24, "files": 2}, + "description": "Adding & operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["bb311eb35e7eb53fb5ae01e3f80336866c7e3e37"], + "committed_date": "2014-03-21T15:10:08", + "authored_date": "2014-03-21T15:10:08", + }, + "_index": "git", + }, + { + "_id": "bb311eb35e7eb53fb5ae01e3f80336866c7e3e37", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 4, "lines": 5, "files": 2}, + "description": "Don't serialize empty typed fields into dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["aea8ea9e421bd53a5b058495e68c3fd57bb1dacc"], + "committed_date": "2014-03-15T16:29:37", + "authored_date": "2014-03-15T16:29:37", + }, + "_index": "git", + }, + { + "_id": "aea8ea9e421bd53a5b058495e68c3fd57bb1dacc", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 37, "lines": 40, "files": 3}, + "description": "Bool queries, when combining just adds their params together", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a8819a510b919be43ff3011b904f257798fb8916"], + "committed_date": "2014-03-15T16:16:40", + "authored_date": "2014-03-15T16:16:40", + }, + "_index": "git", + }, + { + "_id": "a8819a510b919be43ff3011b904f257798fb8916", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/run_tests.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 2, "lines": 8, "files": 1}, + "description": "Simpler run_tests.py", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e35792a725be2325fc54d3fcb95a7d38d8075a99"], + "committed_date": "2014-03-15T16:02:21", + "authored_date": "2014-03-15T16:02:21", + }, + "_index": "git", + }, + { + "_id": "e35792a725be2325fc54d3fcb95a7d38d8075a99", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/aggs.py", "opensearchpy/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 2}, + "description": "Maku we don't treat shortcuts as methods.", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc"], + "committed_date": "2014-03-15T15:59:21", + "authored_date": "2014-03-15T15:59:21", + }, + "_index": "git", + }, + { + "_id": "3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/query.py", + "opensearchpy/utils.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 9, "insertions": 5, "lines": 14, "files": 3}, + "description": "Centralize == of Dsl objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b5e7d0c4b284211df8f7b464fcece93a27a802fb"], + "committed_date": "2014-03-10T21:37:24", + "authored_date": "2014-03-10T21:37:24", + }, + "_index": "git", + }, + { + "_id": "b5e7d0c4b284211df8f7b464fcece93a27a802fb", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + "opensearchpy/utils.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 75, "insertions": 115, "lines": 190, "files": 6}, + "description": "Experimental draft with more declarative DSL", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0fe741b43adee5ca1424584ddd3f35fa33f8733c"], + "committed_date": "2014-03-10T21:34:39", + "authored_date": "2014-03-10T21:34:39", + }, + "_index": "git", + }, + { + "_id": "0fe741b43adee5ca1424584ddd3f35fa33f8733c", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["test_opensearchpy/test_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 1}, + "description": "Make sure .query is chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a22be5933d4b022cbacee867b1aece120208edf3"], + "committed_date": "2014-03-07T17:41:59", + "authored_date": "2014-03-07T17:41:59", + }, + "_index": "git", + }, + { + "_id": "a22be5933d4b022cbacee867b1aece120208edf3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 14, "insertions": 44, "lines": 58, "files": 3}, + "description": "Search now does aggregations", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e823686aacfc4bdcb34ffdab337a26fa09659a9a"], + "committed_date": "2014-03-07T17:29:55", + "authored_date": "2014-03-07T17:29:55", + }, + "_index": "git", + }, + { + "_id": "e823686aacfc4bdcb34ffdab337a26fa09659a9a", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 1, "lines": 1, "files": 1}, + "description": "Ignore html coverage report", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e0aedb3011c71d704deec03a8f32b2b360d6e364"], + "committed_date": "2014-03-07T17:03:23", + "authored_date": "2014-03-07T17:03:23", + }, + "_index": "git", + }, + { + "_id": "e0aedb3011c71d704deec03a8f32b2b360d6e364", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/aggs.py", + "test_opensearchpy/test_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 228, "lines": 228, "files": 2}, + "description": "Added aggregation DSL objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd"], + "committed_date": "2014-03-07T16:25:55", + "authored_date": "2014-03-07T16:25:55", + }, + "_index": "git", + }, + { + "_id": "61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py", "opensearchpy/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 12, "insertions": 7, "lines": 19, "files": 2}, + "description": "Only retrieve DslClass, leave the instantiation to the caller", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["647f1017a7b17a913e07af70a3b03202f6adbdfd"], + "committed_date": "2014-03-07T15:27:43", + "authored_date": "2014-03-07T15:27:43", + }, + "_index": "git", + }, + { + "_id": "647f1017a7b17a913e07af70a3b03202f6adbdfd", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "test_opensearchpy/test_dsl/test_search.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 19, "insertions": 19, "lines": 38, "files": 3}, + "description": "No need to replicate Query suffix when in query namespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d"], + "committed_date": "2014-03-07T15:19:01", + "authored_date": "2014-03-07T15:19:01", + }, + "_index": "git", + }, + { + "_id": "7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["opensearchpy/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 3, "lines": 5, "files": 1}, + "description": "Ask forgiveness, not permission", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["c10793c2ca43688195e415b25b674ff34d58eaff"], + "committed_date": "2014-03-07T15:13:22", + "authored_date": "2014-03-07T15:13:22", + }, + "_index": "git", + }, + { + "_id": "c10793c2ca43688195e415b25b674ff34d58eaff", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/utils.py", + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 27, "lines": 51, "files": 3}, + "description": "Extract DSL object registration to DslMeta", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d8867fdb17fcf4c696657740fa08d29c36adc6ec"], + "committed_date": "2014-03-07T15:12:13", + "authored_date": "2014-03-07T15:10:31", + }, + "_index": "git", + }, + { + "_id": "d8867fdb17fcf4c696657740fa08d29c36adc6ec", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "Search.to_dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2eb7cd980d917ed6f4a4dd8e246804f710ec5082"], + "committed_date": "2014-03-07T02:58:33", + "authored_date": "2014-03-07T02:58:33", + }, + "_index": "git", + }, + { + "_id": "2eb7cd980d917ed6f4a4dd8e246804f710ec5082", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/search.py", + "test_opensearchpy/test_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 113, "lines": 113, "files": 2}, + "description": "Basic Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["11708576f9118e0dbf27ae1f8a7b799cf281b511"], + "committed_date": "2014-03-06T21:02:03", + "authored_date": "2014-03-06T21:01:05", + }, + "_index": "git", + }, + { + "_id": "11708576f9118e0dbf27ae1f8a7b799cf281b511", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "/query.popensearchpyy", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "MatchAll query + anything is anything", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dc496e5c7c1b2caf290df477fca2db61ebe37e0"], + "committed_date": "2014-03-06T20:40:39", + "authored_date": "2014-03-06T20:39:52", + }, + "_index": "git", + }, + { + "_id": "1dc496e5c7c1b2caf290df477fca2db61ebe37e0", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 53, "lines": 53, "files": 2}, + "description": "From_dict, Q(dict) and bool query parses it's subqueries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d407f99d1959b7b862a541c066d9fd737ce913f3"], + "committed_date": "2014-03-06T20:24:30", + "authored_date": "2014-03-06T20:24:30", + }, + "_index": "git", + }, + { + "_id": "d407f99d1959b7b862a541c066d9fd737ce913f3", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": ["CONTRIBUTING.md", "README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 21, "lines": 27, "files": 2}, + "description": "Housekeeping - licence and updated generic CONTRIBUTING.md", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277e8ecc7395754d1ba1f2411ec32337a3e9d73f"], + "committed_date": "2014-03-05T16:21:44", + "authored_date": "2014-03-05T16:21:44", + }, + "_index": "git", + }, + { + "_id": "277e8ecc7395754d1ba1f2411ec32337a3e9d73f", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "setup.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 59, "lines": 59, "files": 3}, + "description": "Automatic query registration and Q function", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["8f1e34bd8f462fec50bcc10971df2d57e2986604"], + "committed_date": "2014-03-05T16:18:52", + "authored_date": "2014-03-05T16:18:52", + }, + "_index": "git", + }, + { + "_id": "8f1e34bd8f462fec50bcc10971df2d57e2986604", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/query.py", + "test_opensearchpy/test_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 54, "lines": 54, "files": 2}, + "description": "Initial implementation of match and bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["fcff47ddcc6d08be5739d03dd30f504fb9db2608"], + "committed_date": "2014-03-05T15:55:06", + "authored_date": "2014-03-05T15:55:06", + }, + "_index": "git", + }, + { + "_id": "fcff47ddcc6d08be5739d03dd30f504fb9db2608", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "docs/Makefile", + "CONTRIBUTING.md", + "docs/conf.py", + "LICENSE", + "Changelog.rst", + "docs/index.rst", + "docs/Changelog.rst", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 692, "lines": 692, "files": 7}, + "description": "Docs template", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["febe8127ae48fcc81778c0fb2d628f1bcc0a0350"], + "committed_date": "2014-03-04T01:42:31", + "authored_date": "2014-03-04T01:42:31", + }, + "_index": "git", + }, + { + "_id": "febe8127ae48fcc81778c0fb2d628f1bcc0a0350", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [ + "opensearchpy/__init__.py", + "test_opensearchpy/test_dsl/run_tests.py", + "setup.py", + "README.rst", + "test_opensearchpy/test_dsl/__init__.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 82, "lines": 82, "files": 5}, + "description": "Empty project structure", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2a8f1ce89760bfc72808f3945b539eae650acac9"], + "committed_date": "2014-03-04T01:37:49", + "authored_date": "2014-03-03T18:23:55", + }, + "_index": "git", + }, + { + "_id": "2a8f1ce89760bfc72808f3945b539eae650acac9", + "routing": "opensearch-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "opensearch-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 9, "lines": 9, "files": 1}, + "description": "Initial commit, .gitignore", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": [], + "committed_date": "2014-03-03T18:15:05", + "authored_date": "2014-03-03T18:15:05", + }, + "_index": "git", + }, +] + + +def flatten_doc(d): + src = d["_source"].copy() + del src["commit_repo"] + return {"_index": "flat-git", "_id": d["_id"], "_source": src} + + +FLAT_DATA = [flatten_doc(d) for d in DATA if "routing" in d] + + +def create_test_git_data(d): + src = d["_source"].copy() + return { + "_index": "test-git", + "routing": "opensearch-py", + "_id": d["_id"], + "_source": src, + } + + +TEST_GIT_DATA = [create_test_git_data(d) for d in DATA] diff --git a/test_opensearchpy/test_async/test_server/test_helpers/test_document.py b/test_opensearchpy/test_async/test_server/test_helpers/test_document.py new file mode 100644 index 00000000..172dfbfc --- /dev/null +++ b/test_opensearchpy/test_async/test_server/test_helpers/test_document.py @@ -0,0 +1,555 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from datetime import datetime +from ipaddress import ip_address + +import pytest +from pytest import raises +from pytz import timezone + +from opensearchpy import ( + Binary, + Boolean, + ConflictError, + Date, + Double, + InnerDoc, + Ip, + Keyword, + Long, + MetaField, + Nested, + NotFoundError, + Object, + Q, + RankFeatures, + Text, + analyzer, +) +from opensearchpy._async.helpers.actions import aiter +from opensearchpy._async.helpers.document import AsyncDocument +from opensearchpy._async.helpers.mapping import AsyncMapping +from opensearchpy.helpers.utils import AttrList + +pytestmark = pytest.mark.asyncio +snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) + + +class User(InnerDoc): + name = Text(fields={"raw": Keyword()}) + + +class Wiki(AsyncDocument): + owner = Object(User) + views = Long() + ranked = RankFeatures() + + class Index: + name = "test-wiki" + + +class Repository(AsyncDocument): + owner = Object(User) + created_at = Date() + description = Text(analyzer=snowball) + tags = Keyword() + + @classmethod + def search(cls): + return super(Repository, cls).search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(AsyncDocument): + committed_date = Date() + authored_date = Date() + description = Text(analyzer=snowball) + + class Index: + name = "flat-git" + + class Meta: + mapping = AsyncMapping() + + +class History(InnerDoc): + timestamp = Date() + diff = Text() + + +class Comment(InnerDoc): + content = Text() + created_at = Date() + author = Object(User) + history = Nested(History) + + class Meta: + dynamic = MetaField(False) + + +class PullRequest(AsyncDocument): + comments = Nested(Comment) + created_at = Date() + + class Index: + name = "test-prs" + + +class SerializationDoc(AsyncDocument): + i = Long() + b = Boolean() + d = Double() + bin = Binary() + ip = Ip() + + class Index: + name = "test-serialization" + + +async def test_serialization(write_client): + await SerializationDoc.init() + await write_client.index( + index="test-serialization", + id=42, + body={ + "i": [1, 2, "3", None], + "b": [True, False, "true", "false", None], + "d": [0.1, "-0.1", None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "ip": ["::1", "127.0.0.1", None], + }, + ) + sd = await SerializationDoc.get(id=42) + + assert sd.i == [1, 2, 3, None] + assert sd.b == [True, False, True, False, None] + assert sd.d == [0.1, -0.1, None] + assert sd.bin == [b"Hello World", None] + assert sd.ip == [ip_address("::1"), ip_address("127.0.0.1"), None] + + assert sd.to_dict() == { + "b": [True, False, True, False, None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "d": [0.1, -0.1, None], + "i": [1, 2, 3, None], + "ip": ["::1", "127.0.0.1", None], + } + + +async def test_nested_inner_hits_are_wrapped_properly(pull_request): + history_query = Q( + "nested", + path="comments.history", + inner_hits={}, + query=Q("match", comments__history__diff="ahoj"), + ) + s = PullRequest.search().query( + "nested", inner_hits={}, path="comments", query=history_query + ) + + response = await s.execute() + pr = response.hits[0] + assert isinstance(pr, PullRequest) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].history[0], History) + + comment = pr.meta.inner_hits.comments.hits[0] + assert isinstance(comment, Comment) + assert comment.author.name == "honzakral" + assert isinstance(comment.history[0], History) + + history = comment.meta.inner_hits["comments.history"].hits[0] + assert isinstance(history, History) + assert history.timestamp == datetime(2012, 1, 1) + assert "score" in history.meta + + +async def test_nested_inner_hits_are_deserialized_properly(pull_request): + s = PullRequest.search().query( + "nested", + inner_hits={}, + path="comments", + query=Q("match", comments__content="hello"), + ) + + response = await s.execute() + pr = response.hits[0] + assert isinstance(pr.created_at, datetime) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].created_at, datetime) + + +async def test_nested_top_hits_are_wrapped_properly(pull_request): + s = PullRequest.search() + s.aggs.bucket("comments", "nested", path="comments").metric( + "hits", "top_hits", size=1 + ) + + r = await s.execute() + + print(r._d_) + assert isinstance(r.aggregations.comments.hits.hits[0], Comment) + + +async def test_update_object_field(write_client): + await Wiki.init() + w = Wiki( + owner=User(name="Honza Kral"), + _id="opensearch-py", + ranked={"test1": 0.1, "topic2": 0.2}, + ) + await w.save() + + assert "updated" == await w.update(owner=[{"name": "Honza"}, {"name": "Nick"}]) + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + w = await Wiki.get(id="opensearch-py") + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + assert w.ranked == {"test1": 0.1, "topic2": 0.2} + + +async def test_update_script(write_client): + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42) + await w.save() + + await w.update(script="ctx._source.views += params.inc", inc=5) + w = await Wiki.get(id="opensearch-py") + assert w.views == 47 + + +async def test_update_retry_on_conflict(write_client): + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42) + await w.save() + + w1 = await Wiki.get(id="opensearch-py") + w2 = await Wiki.get(id="opensearch-py") + await w1.update( + script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1 + ) + await w2.update( + script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1 + ) + + w = await Wiki.get(id="opensearch-py") + assert w.views == 52 + + +@pytest.mark.parametrize("retry_on_conflict", [None, 0]) +async def test_update_conflicting_version(write_client, retry_on_conflict): + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42) + await w.save() + + w1 = await Wiki.get(id="opensearch-py") + w2 = await Wiki.get(id="opensearch-py") + await w1.update(script="ctx._source.views += params.inc", inc=5) + + with raises(ConflictError): + await w2.update( + script="ctx._source.views += params.inc", + inc=5, + retry_on_conflict=retry_on_conflict, + ) + + +async def test_save_and_update_return_doc_meta(write_client): + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42) + resp = await w.save(return_doc_meta=True) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "created" + assert resp.keys().__contains__("_id") + assert resp.keys().__contains__("_primary_term") + assert resp.keys().__contains__("_seq_no") + assert resp.keys().__contains__("_shards") + assert resp.keys().__contains__("_version") + + resp = await w.update( + script="ctx._source.views += params.inc", inc=5, return_doc_meta=True + ) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "updated" + assert resp.keys().__contains__("_id") + assert resp.keys().__contains__("_primary_term") + assert resp.keys().__contains__("_seq_no") + assert resp.keys().__contains__("_shards") + assert resp.keys().__contains__("_version") + + +async def test_init(write_client): + await Repository.init(index="test-git") + + assert await write_client.indices.exists(index="test-git") + + +async def test_get_raises_404_on_index_missing(data_client): + with raises(NotFoundError): + await Repository.get("opensearch-dsl-php", index="not-there") + + +async def test_get_raises_404_on_non_existent_id(data_client): + with raises(NotFoundError): + await Repository.get("opensearch-dsl-php") + + +async def test_get_returns_none_if_404_ignored(data_client): + assert None is await Repository.get("opensearch-dsl-php", ignore=404) + + +async def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client): + assert None is await Repository.get("42", index="not-there", ignore=404) + + +async def test_get(data_client): + opensearch_repo = await Repository.get("opensearch-py") + + assert isinstance(opensearch_repo, Repository) + assert opensearch_repo.owner.name == "opensearch" + assert datetime(2014, 3, 3) == opensearch_repo.created_at + + +async def test_exists_return_true(data_client): + assert await Repository.exists("opensearch-py") + + +async def test_exists_false(data_client): + assert not await Repository.exists("opensearch-dsl-php") + + +async def test_get_with_tz_date(data_client): + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py" + ) + + tzinfo = timezone("Europe/Prague") + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) + == first_commit.authored_date + ) + + +async def test_save_with_tz_date(data_client): + tzinfo = timezone("Europe/Prague") + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py" + ) + first_commit.committed_date = tzinfo.localize( + datetime(2014, 5, 2, 13, 47, 19, 123456) + ) + await first_commit.save() + + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py" + ) + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) + == first_commit.committed_date + ) + + +COMMIT_DOCS_WITH_MISSING = [ + {"_id": "0"}, # Missing + {"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing + {"_id": "f"}, # Missing + {"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing +] + + +async def test_mget(data_client): + commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING) + assert commits[0] is None + assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[2] is None + assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +async def test_mget_raises_exception_when_missing_param_is_invalid(data_client): + with raises(ValueError): + await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj") + + +async def test_mget_raises_404_when_missing_param_is_raise(data_client): + with raises(NotFoundError): + await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise") + + +async def test_mget_ignores_missing_docs_when_missing_param_is_skip(data_client): + commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip") + assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +async def test_update_works_from_search_response(data_client): + opensearch_repo = (await Repository.search().execute())[0] + + await opensearch_repo.update(owner={"other_name": "opensearchpy"}) + assert "opensearchpy" == opensearch_repo.owner.other_name + + new_version = await Repository.get("opensearch-py") + assert "opensearchpy" == new_version.owner.other_name + assert "opensearch" == new_version.owner.name + + +async def test_update(data_client): + opensearch_repo = await Repository.get("opensearch-py") + v = opensearch_repo.meta.version + + old_seq_no = opensearch_repo.meta.seq_no + await opensearch_repo.update( + owner={"new_name": "opensearchpy"}, new_field="testing-update" + ) + + assert "opensearchpy" == opensearch_repo.owner.new_name + assert "testing-update" == opensearch_repo.new_field + + # assert version has been updated + assert opensearch_repo.meta.version == v + 1 + + new_version = await Repository.get("opensearch-py") + assert "testing-update" == new_version.new_field + assert "opensearchpy" == new_version.owner.new_name + assert "opensearch" == new_version.owner.name + assert "seq_no" in new_version.meta + assert new_version.meta.seq_no != old_seq_no + assert "primary_term" in new_version.meta + + +async def test_save_updates_existing_doc(data_client): + opensearch_repo = await Repository.get("opensearch-py") + + opensearch_repo.new_field = "testing-save" + old_seq_no = opensearch_repo.meta.seq_no + assert "updated" == await opensearch_repo.save() + + new_repo = await data_client.get(index="git", id="opensearch-py") + assert "testing-save" == new_repo["_source"]["new_field"] + assert new_repo["_seq_no"] != old_seq_no + assert new_repo["_seq_no"] == opensearch_repo.meta.seq_no + + +async def test_save_automatically_uses_seq_no_and_primary_term(data_client): + opensearch_repo = await Repository.get("opensearch-py") + opensearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + await opensearch_repo.save() + + +async def test_delete_automatically_uses_seq_no_and_primary_term(data_client): + opensearch_repo = await Repository.get("opensearch-py") + opensearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + await opensearch_repo.delete() + + +async def assert_doc_equals(expected, actual): + async for f in aiter(expected): + assert f in actual + assert actual[f] == expected[f] + + +async def test_can_save_to_different_index(write_client): + test_repo = Repository(description="testing", meta={"id": 42}) + assert await test_repo.save(index="test-document") + + await assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"description": "testing"}, + }, + await write_client.get(index="test-document", id=42), + ) + + +async def test_save_without_skip_empty_will_include_empty_fields(write_client): + test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42}) + assert await test_repo.save(index="test-document", skip_empty=False) + + await assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"field_1": [], "field_2": None, "field_3": {}}, + }, + await write_client.get(index="test-document", id=42), + ) + + +async def test_delete(write_client): + await write_client.create( + index="test-document", + id="opensearch-py", + body={ + "organization": "opensearch", + "created_at": "2014-03-03", + "owner": {"name": "opensearch"}, + }, + ) + + test_repo = Repository(meta={"id": "opensearch-py"}) + test_repo.meta.index = "test-document" + await test_repo.delete() + + assert not await write_client.exists( + index="test-document", + id="opensearch-py", + ) + + +async def test_search(data_client): + assert await Repository.search().count() == 1 + + +async def test_search_returns_proper_doc_classes(data_client): + result = await Repository.search().execute() + + opensearch_repo = result.hits[0] + + assert isinstance(opensearch_repo, Repository) + assert opensearch_repo.owner.name == "opensearch" + + +async def test_refresh_mapping(data_client): + class Commit(AsyncDocument): + class Index: + name = "git" + + await Commit._index.load_mappings() + + assert "stats" in Commit._index._mapping + assert "committer" in Commit._index._mapping + assert "description" in Commit._index._mapping + assert "committed_date" in Commit._index._mapping + assert isinstance(Commit._index._mapping["committed_date"], Date) + + +async def test_highlight_in_meta(data_client): + commit = ( + await Commit.search() + .query("match", description="inverting") + .highlight("description") + .execute() + )[0] + + assert isinstance(commit, Commit) + assert "description" in commit.meta.highlight + assert isinstance(commit.meta.highlight["description"], AttrList) + assert len(commit.meta.highlight["description"]) > 0 diff --git a/test_opensearchpy/test_async/test_server/test_helpers/test_faceted_search.py b/test_opensearchpy/test_async/test_server/test_helpers/test_faceted_search.py new file mode 100644 index 00000000..c6350c67 --- /dev/null +++ b/test_opensearchpy/test_async/test_server/test_helpers/test_faceted_search.py @@ -0,0 +1,274 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from datetime import datetime + +import pytest + +from opensearchpy import A, Boolean, Date, Keyword +from opensearchpy._async.helpers.document import AsyncDocument +from opensearchpy._async.helpers.faceted_search import AsyncFacetedSearch +from opensearchpy.helpers.faceted_search import ( + DateHistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) +from test_opensearchpy.test_async.test_server.test_helpers.test_document import ( + PullRequest, +) + +pytestmark = pytest.mark.asyncio + + +class Repos(AsyncDocument): + is_public = Boolean() + created_at = Date() + + class Index: + name = "git" + + +class Commit(AsyncDocument): + files = Keyword() + committed_date = Date() + + class Index: + name = "git" + + +class MetricSearch(AsyncFacetedSearch): + index = "git" + doc_types = [Commit] + + facets = { + "files": TermsFacet(field="files", metric=A("max", field="committed_date")), + } + + +@pytest.fixture(scope="session") +def commit_search_cls(opensearch_version): + interval_kwargs = {"fixed_interval": "1d"} + + class CommitSearch(AsyncFacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", + ) + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(opensearch_version): + interval_type = "calendar_interval" + + class RepoSearch(AsyncFacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self): + s = super(RepoSearch, self).search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(opensearch_version): + interval_type = "calendar_interval" + + class PRSearch(AsyncFacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch + + +async def test_facet_with_custom_metric(data_client): + ms = MetricSearch() + r = await ms.execute() + + dates = [f[1] for f in r.facets.files] + assert dates == list(sorted(dates, reverse=True)) + assert dates[0] == 1399038439000 + + +async def test_nested_facet(pull_request, pr_search_cls): + prs = pr_search_cls() + r = await prs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments + + +async def test_nested_facet_with_filter(pull_request, pr_search_cls): + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) + r = await prs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments + + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + r = await prs.execute() + assert not r.hits + + +async def test_datehistogram_facet(data_client, repo_search_cls): + rs = repo_search_cls() + r = await rs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created + + +async def test_boolean_facet(data_client, repo_search_cls): + rs = repo_search_cls() + r = await rs.execute() + + assert r.hits.total.value == 1 + assert [(True, 1, False)] == r.facets.public + value, count, selected = r.facets.public[0] + assert value is True + + +async def test_empty_search_finds_everything( + data_client, opensearch_version, commit_search_cls +): + cs = commit_search_cls() + r = await cs.execute() + assert r.hits.total.value == 52 + assert [ + ("opensearchpy", 39, False), + ("test_opensearchpy", 35, False), + ("test_opensearchpy/test_dsl", 35, False), + ("opensearchpy/query.py", 18, False), + ("test_opensearchpy/test_dsl/test_search.py", 15, False), + ("opensearchpy/utils.py", 14, False), + ("test_opensearchpy/test_dsl/test_query.py", 13, False), + ("opensearchpy/search.py", 12, False), + ("opensearchpy/aggs.py", 11, False), + ("test_opensearchpy/test_dsl/test_result.py", 5, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 2, False), + (datetime(2014, 3, 4, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 3, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 9, False), + (datetime(2014, 3, 10, 0, 0), 2, False), + (datetime(2014, 3, 15, 0, 0), 4, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 2, False), + (datetime(2014, 3, 24, 0, 0), 10, False), + (datetime(2014, 4, 20, 0, 0), 2, False), + (datetime(2014, 4, 22, 0, 0), 2, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 2, False), + (datetime(2014, 5, 1, 0, 0), 2, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 19, False), + ("good", 14, False), + ("better", 19, False), + ] == r.facets.deletions + + +async def test_term_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"files": "test_opensearchpy/test_dsl"}) + + r = await cs.execute() + + assert 35 == r.hits.total.value + assert [ + ("opensearchpy", 39, False), + ("test_opensearchpy", 35, False), + ("test_opensearchpy/test_dsl", 35, True), + ("opensearchpy/query.py", 18, False), + ("test_opensearchpy/test_dsl/test_search.py", 15, False), + ("opensearchpy/utils.py", 14, False), + ("test_opensearchpy/test_dsl/test_query.py", 13, False), + ("opensearchpy/search.py", 12, False), + ("opensearchpy/aggs.py", 11, False), + ("test_opensearchpy/test_dsl/test_result.py", 5, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 2, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 6, False), + (datetime(2014, 3, 10, 0, 0), 1, False), + (datetime(2014, 3, 15, 0, 0), 3, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 1, False), + (datetime(2014, 3, 24, 0, 0), 7, False), + (datetime(2014, 4, 20, 0, 0), 1, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 1, False), + (datetime(2014, 5, 1, 0, 0), 1, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 12, False), + ("good", 10, False), + ("better", 13, False), + ] == r.facets.deletions + + +async def test_range_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"deletions": "better"}) + + r = await cs.execute() + + assert 19 == r.hits.total.value + + +async def test_pagination(data_client, commit_search_cls): + cs = commit_search_cls() + cs = cs[0:20] + + assert 52 == await cs.count() + assert 20 == len(await cs.execute()) diff --git a/test_opensearchpy/test_async/test_server/test_helpers/test_index.py b/test_opensearchpy/test_async/test_server/test_helpers/test_index.py new file mode 100644 index 00000000..26f452ca --- /dev/null +++ b/test_opensearchpy/test_async/test_server/test_helpers/test_index.py @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import pytest + +from opensearchpy import Date, Text +from opensearchpy._async.helpers.document import AsyncDocument +from opensearchpy._async.helpers.index import AsyncIndex, AsyncIndexTemplate +from opensearchpy.helpers import analysis + +pytestmark = pytest.mark.asyncio + + +class Post(AsyncDocument): + title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")) + published_from = Date() + + +async def test_index_template_works(write_client): + it = AsyncIndexTemplate("test-template", "test-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + await it.save() + + i = AsyncIndex("test-blog") + await i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == await write_client.indices.get_mapping(index="test-blog") + + +async def test_index_can_be_saved_even_with_settings(write_client): + i = AsyncIndex("test-blog", using=write_client) + i.settings(number_of_shards=3, number_of_replicas=0) + await i.save() + i.settings(number_of_replicas=1) + await i.save() + + assert ( + "1" + == (await i.get_settings())["test-blog"]["settings"]["index"][ + "number_of_replicas" + ] + ) + + +async def test_index_exists(data_client): + assert await AsyncIndex("git").exists() + assert not await AsyncIndex("not-there").exists() + + +async def test_index_can_be_created_with_settings_and_mappings(write_client): + i = AsyncIndex("test-blog", using=write_client) + i.document(Post) + i.settings(number_of_replicas=0, number_of_shards=1) + await i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == await write_client.indices.get_mapping(index="test-blog") + + settings = await write_client.indices.get_settings(index="test-blog") + assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0" + assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1" + assert settings["test-blog"]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } + + +async def test_delete(write_client): + await write_client.indices.create( + index="test-index", + body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}}, + ) + + i = AsyncIndex("test-index", using=write_client) + await i.delete() + assert not await write_client.indices.exists(index="test-index") + + +async def test_multiple_indices_with_same_doc_type_work(write_client): + i1 = AsyncIndex("test-index-1", using=write_client) + i2 = AsyncIndex("test-index-2", using=write_client) + + for i in i1, i2: + i.document(Post) + await i.create() + + for i in ("test-index-1", "test-index-2"): + settings = await write_client.indices.get_settings(index=i) + assert settings[i]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } diff --git a/test_opensearchpy/test_async/test_server/test_helpers/test_mapping.py b/test_opensearchpy/test_async/test_server/test_helpers/test_mapping.py new file mode 100644 index 00000000..c05fd0ec --- /dev/null +++ b/test_opensearchpy/test_async/test_server/test_helpers/test_mapping.py @@ -0,0 +1,158 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import pytest +from pytest import raises + +from opensearchpy import exceptions +from opensearchpy._async.helpers import mapping +from opensearchpy.helpers import analysis + +pytestmark = pytest.mark.asyncio + + +async def test_mapping_saved_into_opensearch(write_client): + m = mapping.AsyncMapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + m.field("tags", "keyword") + await m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "tags": {"type": "keyword"}, + } + } + } + } == await write_client.indices.get_mapping(index="test-mapping") + + +async def test_mapping_saved_into_opensearch_when_index_already_exists_closed( + write_client, +): + m = mapping.AsyncMapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + await write_client.indices.create(index="test-mapping") + + with raises(exceptions.IllegalOperation): + await m.save("test-mapping", using=write_client) + + await write_client.cluster.health(index="test-mapping", wait_for_status="yellow") + await write_client.indices.close(index="test-mapping") + await m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} + } + } + } == await write_client.indices.get_mapping(index="test-mapping") + + +async def test_mapping_saved_into_opensearch_when_index_already_exists_with_analysis( + write_client, +): + m = mapping.AsyncMapping() + analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") + m.field("name", "text", analyzer=analyzer) + + new_analysis = analyzer.get_analysis_definition() + new_analysis["analyzer"]["other_analyzer"] = { + "type": "custom", + "tokenizer": "whitespace", + } + await write_client.indices.create( + index="test-mapping", body={"settings": {"analysis": new_analysis}} + ) + + m.field("title", "text", analyzer=analyzer) + await m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "title": {"type": "text", "analyzer": "my_analyzer"}, + } + } + } + } == await write_client.indices.get_mapping(index="test-mapping") + + +async def test_mapping_gets_updated_from_opensearch(write_client): + await write_client.indices.create( + index="test-mapping", + body={ + "settings": {"number_of_shards": 1, "number_of_replicas": 0}, + "mappings": { + "date_detection": False, + "properties": { + "title": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + "created_at": {"type": "date"}, + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + }, + }, + }, + }, + }, + ) + + m = await mapping.AsyncMapping.from_opensearch("test-mapping", using=write_client) + + assert ["comments", "created_at", "title"] == list( + sorted(m.properties.properties._d_.keys()) + ) + assert { + "date_detection": False, + "properties": { + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + }, + "created_at": {"type": "date"}, + "title": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + } == m.to_dict() + + # test same with alias + await write_client.indices.put_alias(index="test-mapping", name="test-alias") + + m2 = await mapping.AsyncMapping.from_opensearch("test-alias", using=write_client) + assert m2.to_dict() == m.to_dict() diff --git a/test_opensearchpy/test_async/test_server/test_helpers/test_search.py b/test_opensearchpy/test_async/test_server/test_helpers/test_search.py new file mode 100644 index 00000000..4807488d --- /dev/null +++ b/test_opensearchpy/test_async/test_server/test_helpers/test_search.py @@ -0,0 +1,161 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from __future__ import unicode_literals + +import pytest +from pytest import raises + +from opensearchpy import Date, Keyword, Q, Text, TransportError +from opensearchpy._async.helpers.document import AsyncDocument +from opensearchpy._async.helpers.search import AsyncMultiSearch, AsyncSearch +from opensearchpy.helpers.response import aggs +from test_opensearchpy.test_server.test_helpers.test_data import FLAT_DATA + +pytestmark = pytest.mark.asyncio + + +class Repository(AsyncDocument): + created_at = Date() + description = Text(analyzer="snowball") + tags = Keyword() + + @classmethod + def search(cls): + return super(Repository, cls).search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(AsyncDocument): + class Index: + name = "flat-git" + + +async def test_filters_aggregation_buckets_are_accessible(data_client): + has_tests_query = Q("term", files="test_opensearchpy/test_dsl") + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket( + "has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query} + ).metric("lines", "stats", field="stats.lines") + response = await s.execute() + + assert isinstance( + response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket + ) + assert ( + 35 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count + ) + assert ( + 228 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max + ) + + +async def test_top_hits_are_wrapped_in_response(data_client): + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric( + "top_commits", "top_hits", size=5 + ) + response = await s.execute() + + top_commits = response.aggregations.top_authors.buckets[0].top_commits + assert isinstance(top_commits, aggs.TopHitsData) + assert 5 == len(top_commits) + + hits = [h for h in top_commits] + assert 5 == len(hits) + assert isinstance(hits[0], Commit) + + +async def test_inner_hits_are_wrapped_in_response(data_client): + s = AsyncSearch(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = await s.execute() + + commit = response.hits[0] + assert isinstance(commit.meta.inner_hits.repo, response.__class__) + assert repr(commit.meta.inner_hits.repo[0]).startswith(" 0 + assert not response.timed_out + assert response.updated == 52 + assert response.deleted == 0 + assert response.took > 0 + assert response.success() + + +async def test_update_by_query_with_script(write_client, setup_ubq_tests): + index = setup_ubq_tests + + ubq = ( + AsyncUpdateByQuery(using=write_client) + .index(index) + .filter(~Q("exists", field="parent_shas")) + .script(source="ctx._source.is_public = false") + ) + ubq = ubq.params(conflicts="proceed") + + response = await ubq.execute() + assert response.total == 2 + assert response.updated == 2 + assert response.version_conflicts == 0 + + +async def test_delete_by_query_with_script(write_client, setup_ubq_tests): + index = setup_ubq_tests + + ubq = ( + AsyncUpdateByQuery(using=write_client) + .index(index) + .filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3")) + .script(source='ctx.op = "delete"') + ) + ubq = ubq.params(conflicts="proceed") + + response = await ubq.execute() + + assert response.total == 1 + assert response.deleted == 1 + assert response.success()