diff --git a/.travis.yml b/.travis.yml index c63934991d8..c8152fc1c3b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ python: - 3.6 sudo: false env: - - ES_VERSION=1.3.9 ES_DOWNLOAD_URL=https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz + - ES_VERSION=6.2.4 ES_DOWNLOAD_URL=https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz matrix: include: - python: 2.7 @@ -42,3 +42,4 @@ notifications: branches: only: - master + - search_upgrade diff --git a/readthedocs/projects/admin.py b/readthedocs/projects/admin.py index 3b57d8f81f3..f79b094bb92 100644 --- a/readthedocs/projects/admin.py +++ b/readthedocs/projects/admin.py @@ -15,7 +15,7 @@ from .forms import FeatureForm from .models import (Project, ImportedFile, Feature, - ProjectRelationship, EmailHook, WebHook, Domain) + ProjectRelationship, EmailHook, WebHook, Domain, HTMLFile) from .notifications import ResourceUsageNotification from .tasks import remove_dir @@ -206,3 +206,4 @@ def project_count(self, feature): admin.site.register(Feature, FeatureAdmin) admin.site.register(EmailHook) admin.site.register(WebHook) +admin.site.register(HTMLFile) diff --git a/readthedocs/projects/apps.py b/readthedocs/projects/apps.py index c6c0bf29017..42c0892b9eb 100644 --- a/readthedocs/projects/apps.py +++ b/readthedocs/projects/apps.py @@ -9,5 +9,6 @@ class ProjectsConfig(AppConfig): def ready(self): from readthedocs.projects import tasks from readthedocs.worker import app + app.tasks.register(tasks.SyncRepositoryTask) app.tasks.register(tasks.UpdateDocsTask) diff --git a/readthedocs/projects/managers.py b/readthedocs/projects/managers.py new file mode 100644 index 00000000000..c8b524702b9 --- /dev/null +++ b/readthedocs/projects/managers.py @@ -0,0 +1,7 @@ +from django.db import models + + +class HTMLFileManager(models.Manager): + + def get_queryset(self): + return super(HTMLFileManager, self).get_queryset().filter(name__endswith='.html') diff --git a/readthedocs/projects/migrations/0026_add_htmlfile_model.py b/readthedocs/projects/migrations/0026_add_htmlfile_model.py new file mode 100644 index 00000000000..df6635bd9b8 --- /dev/null +++ b/readthedocs/projects/migrations/0026_add_htmlfile_model.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.9.13 on 2018-06-18 16:45 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('projects', '0025_show-version-warning-existing-projects'), + ] + + operations = [ + migrations.CreateModel( + name='HTMLFile', + fields=[ + ], + options={ + 'proxy': True, + }, + bases=('projects.importedfile',), + ), + migrations.AlterField( + model_name='project', + name='comment_moderation', + field=models.BooleanField(default=False, verbose_name='Comment Moderation'), + ), + migrations.AlterField( + model_name='project', + name='documentation_type', + field=models.CharField(choices=[('auto', 'Automatically Choose'), ('sphinx', 'Sphinx Html'), ('mkdocs', 'Mkdocs (Markdown)'), ('sphinx_htmldir', 'Sphinx HtmlDir'), ('sphinx_singlehtml', 'Sphinx Single Page HTML')], default='sphinx', help_text='Type of documentation you are building. More info.', max_length=20, verbose_name='Documentation type'), + ), + migrations.AlterField( + model_name='project', + name='language', + field=models.CharField(choices=[('aa', 'Afar'), ('ab', 'Abkhaz'), ('af', 'Afrikaans'), ('am', 'Amharic'), ('ar', 'Arabic'), ('as', 'Assamese'), ('ay', 'Aymara'), ('az', 'Azerbaijani'), ('ba', 'Bashkir'), ('be', 'Belarusian'), ('bg', 'Bulgarian'), ('bh', 'Bihari'), ('bi', 'Bislama'), ('bn', 'Bengali'), ('bo', 'Tibetan'), ('br', 'Breton'), ('ca', 'Catalan'), ('co', 'Corsican'), ('cs', 'Czech'), ('cy', 'Welsh'), ('da', 'Danish'), ('de', 'German'), ('dz', 'Dzongkha'), ('el', 'Greek'), ('en', 'English'), ('eo', 'Esperanto'), ('es', 'Spanish'), ('et', 'Estonian'), ('eu', 'Basque'), ('fa', 'Iranian'), ('fi', 'Finnish'), ('fj', 'Fijian'), ('fo', 'Faroese'), ('fr', 'French'), ('fy', 'Western Frisian'), ('ga', 'Irish'), ('gd', 'Scottish Gaelic'), ('gl', 'Galician'), ('gn', 'Guarani'), ('gu', 'Gujarati'), ('ha', 'Hausa'), ('hi', 'Hindi'), ('he', 'Hebrew'), ('hr', 'Croatian'), ('hu', 'Hungarian'), ('hy', 'Armenian'), ('ia', 'Interlingua'), ('id', 'Indonesian'), ('ie', 'Interlingue'), ('ik', 'Inupiaq'), ('is', 'Icelandic'), ('it', 'Italian'), ('iu', 'Inuktitut'), ('ja', 'Japanese'), ('jv', 'Javanese'), ('ka', 'Georgian'), ('kk', 'Kazakh'), ('kl', 'Kalaallisut'), ('km', 'Khmer'), ('kn', 'Kannada'), ('ko', 'Korean'), ('ks', 'Kashmiri'), ('ku', 'Kurdish'), ('ky', 'Kyrgyz'), ('la', 'Latin'), ('ln', 'Lingala'), ('lo', 'Lao'), ('lt', 'Lithuanian'), ('lv', 'Latvian'), ('mg', 'Malagasy'), ('mi', 'Maori'), ('mk', 'Macedonian'), ('ml', 'Malayalam'), ('mn', 'Mongolian'), ('mr', 'Marathi'), ('ms', 'Malay'), ('mt', 'Maltese'), ('my', 'Burmese'), ('na', 'Nauru'), ('ne', 'Nepali'), ('nl', 'Dutch'), ('no', 'Norwegian'), ('oc', 'Occitan'), ('om', 'Oromo'), ('or', 'Oriya'), ('pa', 'Panjabi'), ('pl', 'Polish'), ('ps', 'Pashto'), ('pt', 'Portuguese'), ('qu', 'Quechua'), ('rm', 'Romansh'), ('rn', 'Kirundi'), ('ro', 'Romanian'), ('ru', 'Russian'), ('rw', 'Kinyarwanda'), ('sa', 'Sanskrit'), ('sd', 'Sindhi'), ('sg', 'Sango'), ('si', 'Sinhala'), ('sk', 'Slovak'), ('sl', 'Slovenian'), ('sm', 'Samoan'), ('sn', 'Shona'), ('so', 'Somali'), ('sq', 'Albanian'), ('sr', 'Serbian'), ('ss', 'Swati'), ('st', 'Southern Sotho'), ('su', 'Sudanese'), ('sv', 'Swedish'), ('sw', 'Swahili'), ('ta', 'Tamil'), ('te', 'Telugu'), ('tg', 'Tajik'), ('th', 'Thai'), ('ti', 'Tigrinya'), ('tk', 'Turkmen'), ('tl', 'Tagalog'), ('tn', 'Tswana'), ('to', 'Tonga'), ('tr', 'Turkish'), ('ts', 'Tsonga'), ('tt', 'Tatar'), ('tw', 'Twi'), ('ug', 'Uyghur'), ('uk', 'Ukrainian'), ('ur', 'Urdu'), ('uz', 'Uzbek'), ('vi', 'Vietnamese'), ('vo', 'Volapuk'), ('wo', 'Wolof'), ('xh', 'Xhosa'), ('yi', 'Yiddish'), ('yo', 'Yoruba'), ('za', 'Zhuang'), ('zh', 'Chinese'), ('zu', 'Zulu'), ('nb_NO', 'Norwegian Bokmal'), ('pt_BR', 'Brazilian Portuguese'), ('es_MX', 'Mexican Spanish'), ('uk_UA', 'Ukrainian'), ('zh_CN', 'Simplified Chinese'), ('zh_TW', 'Traditional Chinese')], default='en', help_text="The language the project documentation is rendered in. Note: this affects your project's URL.", max_length=20, verbose_name='Language'), + ), + migrations.AlterField( + model_name='project', + name='privacy_level', + field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Level of privacy that you want on the repository. Protected means public but not in listings.', max_length=20, verbose_name='Privacy Level'), + ), + migrations.AlterField( + model_name='project', + name='python_interpreter', + field=models.CharField(choices=[('python', 'CPython 2.x'), ('python3', 'CPython 3.x')], default='python', help_text='The Python interpreter used to create the virtual environment.', max_length=20, verbose_name='Python Interpreter'), + ), + migrations.AlterField( + model_name='project', + name='version_privacy_level', + field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Default level of privacy you want on built versions of documentation.', max_length=20, verbose_name='Version Privacy Level'), + ), + ] diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index b7099547e85..14fe14a0b2b 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -7,13 +7,14 @@ import fnmatch import logging import os -from builtins import object # pylint: disable=redefined-builtin +from builtins import object # pylint: disable=redefined-builtin from django.conf import settings from django.contrib.auth.models import User from django.core.urlresolvers import NoReverseMatch, reverse from django.db import models from django.utils.encoding import python_2_unicode_compatible +from django.utils.functional import cached_property from django.utils.translation import ugettext_lazy as _ from future.backports.urllib.parse import urlparse # noqa from guardian.shortcuts import assign @@ -24,6 +25,7 @@ from readthedocs.core.utils import broadcast, slugify from readthedocs.projects import constants from readthedocs.projects.exceptions import ProjectConfigurationError +from readthedocs.projects.managers import HTMLFileManager from readthedocs.projects.querysets import ( ChildRelatedProjectQuerySet, FeatureQuerySet, ProjectQuerySet, RelatedProjectQuerySet) @@ -32,6 +34,7 @@ from readthedocs.projects.version_handling import ( determine_stable_version, version_windows) from readthedocs.restapi.client import api +from readthedocs.search.parse_json import process_file from readthedocs.vcs_support.backends import backend_cls from readthedocs.vcs_support.utils import Lock, NonBlockingLock @@ -910,6 +913,40 @@ def __str__(self): return '%s: %s' % (self.name, self.project) +class HTMLFile(ImportedFile): + + """ + Imported HTML file Proxy model. + + This tracks only the HTML files for indexing to search. + """ + + class Meta(object): + proxy = True + + objects = HTMLFileManager() + + @cached_property + def json_file_path(self): + basename = os.path.splitext(self.path)[0] + file_path = basename + '.fjson' + + full_json_path = self.project.get_production_media_path(type_='json', + version_slug=self.version.slug, + include_file=False) + + file_path = os.path.join(full_json_path, file_path) + return file_path + + def get_processed_json(self): + file_path = self.json_file_path + return process_file(file_path) + + @cached_property + def processed_json(self): + return self.get_processed_json() + + class Notification(models.Model): project = models.ForeignKey(Project, related_name='%(class)s_notifications') diff --git a/readthedocs/projects/signals.py b/readthedocs/projects/signals.py index 21cdf88a19d..405bf32ac63 100644 --- a/readthedocs/projects/signals.py +++ b/readthedocs/projects/signals.py @@ -2,10 +2,11 @@ from __future__ import absolute_import import django.dispatch +from django.db.models.signals import pre_save from django.dispatch import receiver from readthedocs.oauth.utils import attach_webhook - +from .models import HTMLFile before_vcs = django.dispatch.Signal(providing_args=["version"]) after_vcs = django.dispatch.Signal(providing_args=["version"]) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 8e063b255f7..c39b0b5f4c3 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -8,6 +8,7 @@ from __future__ import absolute_import import datetime +import fnmatch import hashlib import json import logging @@ -29,7 +30,7 @@ from .constants import LOG_TEMPLATE from .exceptions import RepositoryError -from .models import ImportedFile, Project, Domain +from .models import ImportedFile, Project, Domain, HTMLFile from .signals import before_vcs, after_vcs, before_build, after_build, files_changed from readthedocs.builds.constants import (LATEST, BUILD_STATE_CLONING, @@ -943,18 +944,24 @@ def _manage_imported_files(version, path, commit): changed_files = set() for root, __, filenames in os.walk(path): for filename in filenames: + if fnmatch.fnmatch(filename, '*.html'): + model_class = HTMLFile + else: + model_class = ImportedFile + dirpath = os.path.join(root.replace(path, '').lstrip('/'), filename.lstrip('/')) full_path = os.path.join(root, filename) md5 = hashlib.md5(open(full_path, 'rb').read()).hexdigest() try: - obj, __ = ImportedFile.objects.get_or_create( + # pylint: disable=unpacking-non-sequence + obj, __ = model_class.objects.get_or_create( project=version.project, version=version, path=dirpath, name=filename, ) - except ImportedFile.MultipleObjectsReturned: + except model_class.MultipleObjectsReturned: log.warning('Error creating ImportedFile') continue if obj.md5 != md5: @@ -963,6 +970,12 @@ def _manage_imported_files(version, path, commit): if obj.commit != commit: obj.commit = commit obj.save() + + # Delete the HTMLFile first from previous versions + HTMLFile.objects.filter(project=version.project, + version=version + ).exclude(commit=commit).delete() + # Delete ImportedFiles from previous versions ImportedFile.objects.filter(project=version.project, version=version @@ -1173,7 +1186,6 @@ def sync_callback(_, version_pk, commit, *args, **kwargs): The first argument is the result from previous tasks, which we discard. """ fileify(version_pk, commit=commit) - update_search(version_pk, commit=commit) @app.task() diff --git a/readthedocs/projects/utils.py b/readthedocs/projects/utils.py index aba4f3d413d..324b9b12f44 100644 --- a/readthedocs/projects/utils.py +++ b/readthedocs/projects/utils.py @@ -32,18 +32,21 @@ def version_from_slug(slug, version): return v -def find_file(filename): +def find_file(basename, pattern, path): """ - Recursively find matching file from the current working path. + Recursively find matching file. - :param file: Filename to match - :returns: A list of matching filenames. + :param basename: Basename of a file to match + :param pattern: Pattern to match + :param path: the directory to search for the file + :returns: path of matching file """ - matches = [] - for root, __, filenames in os.walk('.'): - for match in fnmatch.filter(filenames, filename): - matches.append(os.path.join(root, match)) - return matches + for root, _, files in os.walk(path): + for filename in files: + file_basename = os.path.splitext(filename)[0] + + if fnmatch.fnmatch(filename, pattern) and file_basename == basename: + return os.path.join(root, filename) def run(*commands): diff --git a/readthedocs/search/conf.py b/readthedocs/search/conf.py new file mode 100644 index 00000000000..029024ca1b5 --- /dev/null +++ b/readthedocs/search/conf.py @@ -0,0 +1 @@ +SEARCH_EXCLUDED_FILE = ['search.html', 'genindex.html', 'py-modindex.html'] diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py new file mode 100644 index 00000000000..f4fd4db265e --- /dev/null +++ b/readthedocs/search/documents.py @@ -0,0 +1,106 @@ +from django.conf import settings +from django_elasticsearch_dsl import DocType, Index, fields + +from readthedocs.projects.models import Project, HTMLFile +from .conf import SEARCH_EXCLUDED_FILE + +from readthedocs.search.faceted_search import ProjectSearch, FileSearch + +project_conf = settings.ES_INDEXES['project'] +project_index = Index(project_conf['name']) +project_index.settings(**project_conf['settings']) + +page_conf = settings.ES_INDEXES['page'] +page_index = Index(page_conf['name']) +page_index.settings(**page_conf['settings']) + + +@project_index.doc_type +class ProjectDocument(DocType): + + class Meta(object): + model = Project + fields = ('name', 'slug', 'description') + + url = fields.TextField(attr='get_absolute_url') + users = fields.NestedField(properties={ + 'username': fields.TextField(), + 'id': fields.IntegerField(), + }) + language = fields.KeywordField() + + @classmethod + def faceted_search(cls, query, language=None, using=None, index=None): + kwargs = { + 'using': using or cls._doc_type.using, + 'index': index or cls._doc_type.index, + 'doc_types': [cls], + 'model': cls._doc_type.model, + 'query': query + } + + if language: + kwargs['filters'] = {'language': language} + + return ProjectSearch(**kwargs) + + +@page_index.doc_type +class PageDocument(DocType): + + class Meta(object): + model = HTMLFile + fields = ('commit',) + + project = fields.KeywordField(attr='project.slug') + version = fields.KeywordField(attr='version.slug') + + title = fields.TextField(attr='processed_json.title') + headers = fields.TextField(attr='processed_json.headers') + content = fields.TextField(attr='processed_json.content') + path = fields.TextField(attr='processed_json.path') + + @classmethod + def faceted_search(cls, query, projects_list=None, versions_list=None, using=None, index=None): + kwargs = { + 'using': using or cls._doc_type.using, + 'index': index or cls._doc_type.index, + 'doc_types': [cls], + 'model': cls._doc_type.model, + 'query': query + } + filters = {} + + if projects_list: + filters['project'] = projects_list + if versions_list: + filters['version'] = versions_list + + kwargs['filters'] = filters + + return FileSearch(**kwargs) + + def get_queryset(self): + """Overwrite default queryset to filter certain files to index""" + queryset = super(PageDocument, self).get_queryset() + + # Do not index files that belong to non sphinx project + # Also do not index certain files + queryset = (queryset.filter(project__documentation_type='sphinx') + .exclude(name__in=SEARCH_EXCLUDED_FILE)) + return queryset + + def update(self, thing, refresh=None, action='index', **kwargs): + """Overwrite in order to index only certain files""" + # Object not exist in the provided queryset should not be indexed + # TODO: remove this overwrite when the issue has been fixed + # See below link for more information + # https://github.com/sabricot/django-elasticsearch-dsl/issues/111 + if isinstance(thing, HTMLFile): + # Its a model instance. + queryset = self.get_queryset() + obj = queryset.filter(pk=thing.pk) + if not obj.exists(): + return None + + return super(PageDocument, self).update(thing=thing, refresh=None, action='index', **kwargs) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py new file mode 100644 index 00000000000..56a1d3dec18 --- /dev/null +++ b/readthedocs/search/faceted_search.py @@ -0,0 +1,31 @@ +from elasticsearch_dsl import FacetedSearch, TermsFacet + + +class RTDFacetedSearch(FacetedSearch): + + """Overwrite the initialization in order too meet our needs""" + + # TODO: Remove the overwrite when the elastic/elasticsearch-dsl-py#916 + # See more: https://github.com/elastic/elasticsearch-dsl-py/issues/916 + + def __init__(self, using, index, doc_types, model, **kwargs): + self.using = using + self.index = index + self.doc_types = doc_types + self._model = model + super(RTDFacetedSearch, self).__init__(**kwargs) + + +class ProjectSearch(RTDFacetedSearch): + fields = ['name^5', 'description'] + facets = { + 'language': TermsFacet(field='language') + } + + +class FileSearch(RTDFacetedSearch): + fields = ['title^10', 'headers^5', 'content'] + facets = { + 'project': TermsFacet(field='project'), + 'version': TermsFacet(field='version') + } diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py index 19c4b2ba772..f3401851cfc 100644 --- a/readthedocs/search/indexes.py +++ b/readthedocs/search/indexes.py @@ -19,7 +19,6 @@ import datetime from elasticsearch import Elasticsearch, exceptions -from elasticsearch.helpers import bulk_index from django.conf import settings @@ -143,7 +142,7 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None, docs.append(doc) # TODO: This doesn't work with the new ES setup. - bulk_index(self.es, docs, chunk_size=chunk_size) + # bulk_index(self.es, docs, chunk_size=chunk_size) def index_document(self, data, index=None, parent=None, routing=None): doc = self.extract_document(data) diff --git a/readthedocs/search/tests/conftest.py b/readthedocs/search/tests/conftest.py index 59961f3a7e2..b6aa7799799 100644 --- a/readthedocs/search/tests/conftest.py +++ b/readthedocs/search/tests/conftest.py @@ -1,45 +1,41 @@ -import random -import string +import json +import os from random import shuffle import pytest +from django.core.management import call_command from django_dynamic_fixture import G -from readthedocs.projects.models import Project -from readthedocs.search.indexes import Index, ProjectIndex, PageIndex, SectionIndex -from .dummy_data import DUMMY_PAGE_JSON, ALL_PROJECTS +from readthedocs.projects.models import Project, HTMLFile +from .dummy_data import ALL_PROJECTS, PROJECT_DATA_FILES -@pytest.fixture(autouse=True) -def mock_elastic_index(mocker): - index_name = ''.join([random.choice(string.ascii_letters) for _ in range(5)]) - mocker.patch.object(Index, '_index', index_name.lower()) +@pytest.fixture() +def es_index(): + call_command('search_index', '--delete', '-f') + call_command('search_index', '--create') + + yield + call_command('search_index', '--delete', '-f') @pytest.fixture(autouse=True) -def es_index(mock_elastic_index): - # Create the index. - index = Index() - index_name = index.timestamped_index() - index.create_index(index_name) - index.update_aliases(index_name) - # Update mapping - proj = ProjectIndex() - proj.put_mapping() - page = PageIndex() - page.put_mapping() - sec = SectionIndex() - sec.put_mapping() - - yield index - index.delete_index(index_name=index_name) +def all_projects(es_index, mock_processed_json): + projects_list = [] + for project_slug in ALL_PROJECTS: + project = G(Project, slug=project_slug, name=project_slug) + for file_basename in PROJECT_DATA_FILES[project.slug]: + # file_basename in config are without extension so add html extension + file_name = file_basename + '.html' + version = project.versions.all()[0] + f = G(HTMLFile, project=project, version=version, name=file_name) + f.save() -@pytest.fixture -def all_projects(): - projects = [G(Project, slug=project_slug, name=project_slug) for project_slug in ALL_PROJECTS] - shuffle(projects) - return projects + projects_list.append(project) + + shuffle(projects_list) + return projects_list @pytest.fixture @@ -48,16 +44,23 @@ def project(all_projects): return all_projects[0] -def get_dummy_page_json(version, *args, **kwargs): - dummy_page_json = DUMMY_PAGE_JSON - project_name = version.project.name - return dummy_page_json.get(project_name) +def get_dummy_processed_json(instance): + project_slug = instance.project.slug + basename = os.path.splitext(instance.name)[0] + file_name = basename + '.json' + current_path = os.path.abspath(os.path.dirname(__file__)) + file_path = os.path.join(current_path, "data", project_slug, file_name) + + if os.path.exists(file_path): + with open(file_path) as f: + return json.load(f) @pytest.fixture(autouse=True) -def mock_parse_json(mocker): +def mock_processed_json(mocker): # patch the function from `projects.tasks` because it has been point to there # http://www.voidspace.org.uk/python/mock/patch.html#where-to-patch - mocked_function = mocker.patch('readthedocs.projects.tasks.process_all_json_files') - mocked_function.side_effect = get_dummy_page_json + mocked_function = mocker.patch.object(HTMLFile, 'get_processed_json', autospec=True) + mocked_function.side_effect = get_dummy_processed_json + diff --git a/readthedocs/search/tests/data/docs/story.json b/readthedocs/search/tests/data/docs/story.json index 69226b65209..05b1b614a7b 100644 --- a/readthedocs/search/tests/data/docs/story.json +++ b/readthedocs/search/tests/data/docs/story.json @@ -1,5 +1,5 @@ { - "content": "Philosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want. We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial Support\nThe time of the core developers of Read the Docs is limited. We provide official support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.", + "content": "ReadtheDocsPhilosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want. We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial Support\nThe time of the core developers of Read the Docs is limited. We provide official support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.", "headers": [ "Official Support", "Unsupported", diff --git a/readthedocs/search/tests/data/docs/wiping.json b/readthedocs/search/tests/data/docs/wiping.json index a54889e05fa..ac1cebca67e 100644 --- a/readthedocs/search/tests/data/docs/wiping.json +++ b/readthedocs/search/tests/data/docs/wiping.json @@ -1,5 +1,5 @@ { - "content": "Wiping a Build Environment\nSometimes it happen that your Builds start failing because the build environment where the is created is stale or broken. This could happen for a couple of different reasons like pip not upgrading a package properly or a corrupted cached Python package.\nIn any of these cases (and many others), the solution could be just wiping out the existing build environment files and allow Read the Docs to create a new fresh one.\nFollow these steps to wipe the build environment:\nGo to Versions\nClick on the Edit button of the version you want to wipe on the right side of the page\nGo to the bottom of the page and click the wipe link, next to the \u201cSave\u201d button\nNote\nBy wiping the build environment, all the rst, md, and code files associated with it will be removed but not the already built (HTML and PDF files). Your will still online after wiping the build environment.\nNow you can re-build the version with a fresh build environment!", + "content": "ReadtheDocsWiping a Build Environment\nSometimes it happen that your Builds start failing because the build environment where the is created is stale or broken. This could happen for a couple of different reasons like pip not upgrading a package properly or a corrupted cached Python package.\nIn any of these cases (and many others), the solution could be just wiping out the existing build environment files and allow Read the Docs to create a new fresh one.\nFollow these steps to wipe the build environment:\nGo to Versions\nClick on the Edit button of the version you want to wipe on the right side of the page\nGo to the bottom of the page and click the wipe link, next to the \u201cSave\u201d button\nNote\nBy wiping the build environment, all the rst, md, and code files associated with it will be removed but not the already built (HTML and PDF files). Your will still online after wiping the build environment.\nNow you can re-build the version with a fresh build environment!", "headers": [ "Wiping a Build Environment" ], diff --git a/readthedocs/search/tests/data/kuma/docker.json b/readthedocs/search/tests/data/kuma/docker.json index 3f86764073a..eb218b4dfb0 100644 --- a/readthedocs/search/tests/data/kuma/docker.json +++ b/readthedocs/search/tests/data/kuma/docker.json @@ -1,5 +1,5 @@ { - "content": "kuma-Docker Docker is used for development and (soon) for deployment.\nDocker Images\nDocker images are used in development, usually with the local working files mounted in the images to set behaviour.\nImages are built by Jenkins, after tests pass, and are published to quay.io. We try to store the configuration in the environment, so that the published images can be used in deployments by setting environment variables to deployment-specific values.\nHere are some of the images used in the Kuma project:\nkuma\nThe kuma Docker image builds on the kuma_base image, installing a kuma branch and building the assets needed for running as a webservice. The environment can be customized for different deployments.\nThe image can be recreated locally with make build-kuma.\nThe image tagged latest is used by default for development. It can be created locally with make build-kuma VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io.\nkuma_base\nThe kuma_base Docker image contains the OS and libraries (C, Python, and Node.js) that support the kuma project. The kuma image extends this by installing the kuma source and building assets needed for production.\nThe image can be recreated locally with make build-base.\nThe image tagged latest is used by default for development. It can be created localled with make build-base VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io\nkumascript\nThe kumascript Docker image contains the kumascript rendering engine and support files. The environment can be customized for different deployments.\nThe image can be recreated locally with make build-kumascript.\nThe image tagged latest is used by default for development. It can be created locally with make build-kumascript KS_VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io.\nintegration-tests\nThe integration-tests Docker image contains browser-based integration tests that check the functionality of a running Kuma deployment.\nThe image can be recreated locally with docker build -f docker/images/integration-tests/ ., but this is only necessary for image development. Most developer will follow the Client-side testing to develop and run these integration tests.\nThe image is built and used in Jenkins in the stage-integration-tests and prod-integration-tests pipelines, configured by scripts in the Jenkinsfiles folder. It is not published to quay.io.", + "content": "kumadocker Docker is used for development and (soon) for deployment.\nDocker Images\nDocker images are used in development, usually with the local working files mounted in the images to set behaviour.\nImages are built by Jenkins, after tests pass, and are published to quay.io. We try to store the configuration in the environment, so that the published images can be used in deployments by setting environment variables to deployment-specific values.\nHere are some of the images used in the Kuma project:\nkuma\nThe kuma Docker image builds on the kuma_base image, installing a kuma branch and building the assets needed for running as a webservice. The environment can be customized for different deployments.\nThe image can be recreated locally with make build-kuma.\nThe image tagged latest is used by default for development. It can be created locally with make build-kuma VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io.\nkuma_base\nThe kuma_base Docker image contains the OS and libraries (C, Python, and Node.js) that support the kuma project. The kuma image extends this by installing the kuma source and building assets needed for production.\nThe image can be recreated locally with make build-base.\nThe image tagged latest is used by default for development. It can be created localled with make build-base VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io\nkumascript\nThe kumascript Docker image contains the kumascript rendering engine and support files. The environment can be customized for different deployments.\nThe image can be recreated locally with make build-kumascript.\nThe image tagged latest is used by default for development. It can be created locally with make build-kumascript KS_VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io.\nintegration-tests\nThe integration-tests Docker image contains browser-based integration tests that check the functionality of a running Kuma deployment.\nThe image can be recreated locally with docker build -f docker/images/integration-tests/ ., but this is only necessary for image development. Most developer will follow the Client-side testing to develop and run these integration tests.\nThe image is built and used in Jenkins in the stage-integration-tests and prod-integration-tests pipelines, configured by scripts in the Jenkinsfiles folder. It is not published to quay.io.", "headers": [ "Docker", "Docker Images", diff --git a/readthedocs/search/tests/data/kuma/documentation.json b/readthedocs/search/tests/data/kuma/documentation.json index 310a01d05c8..6add1596dc3 100644 --- a/readthedocs/search/tests/data/kuma/documentation.json +++ b/readthedocs/search/tests/data/kuma/documentation.json @@ -1,5 +1,5 @@ { - "content": "kuma-Documentation This documentation is generated and published at Read the Docs whenever the master branch is updated. GitHub can render our .rst documents as ReStructuredText, which is close enough to Sphinx for most code reviews, without features like links between documents.\nIt is occasionally necessary to generate the documentation locally. It is easiest to do this with a virtualenv on the host system, using only to regenerate the MDN Sphinx template. If you are not comfortable with that style of development, it can be done entirely in using -compose.\nGenerating documentation\nSphinx uses a Makefile in the docs subfolder to build documentation in several formats. MDN only uses the HTML format, and the generated document index is at docs/_build/html/index.html.\nTo generate the documentation in a virtualenv on the host machine, first install the requirements:\npip install -r requirements/docs.txt\nThen switch to the docs folder to use the Makefile:\ncd docs make html python -m webbrowser file://${PWD}/_build/html/index.html\nTo generate the documentation with :\n-compose run --rm --user $(id -u) web sh -c \"\\ virtualenv /tmp/.venvs/docs && \\ . /tmp/.venvs/docs/bin/activate && \\ pip install -r /app/requirements/docs.txt && \\ cd /app/docs && \\ make html\" python -m webbrowser file://${PWD}/docs/_build/html/index.html\nA virtualenv is required, to avoid a pip bug when changing the version of a system-installed package.", + "content": "kumadocumentation This documentation is generated and published at Read the Docs whenever the master branch is updated. GitHub can render our .rst documents as ReStructuredText, which is close enough to Sphinx for most code reviews, without features like links between documents.\nIt is occasionally necessary to generate the documentation locally. It is easiest to do this with a virtualenv on the host system, using only to regenerate the MDN Sphinx template. If you are not comfortable with that style of development, it can be done entirely in using -compose.\nGenerating documentation\nSphinx uses a Makefile in the docs subfolder to build documentation in several formats. MDN only uses the HTML format, and the generated document index is at docs/_build/html/index.html.\nTo generate the documentation in a virtualenv on the host machine, first install the requirements:\npip install -r requirements/docs.txt\nThen switch to the docs folder to use the Makefile:\ncd docs make html python -m webbrowser file://${PWD}/_build/html/index.html\nTo generate the documentation with :\n-compose run --rm --user $(id -u) web sh -c \"\\ virtualenv /tmp/.venvs/docs && \\ . /tmp/.venvs/docs/bin/activate && \\ pip install -r /app/requirements/docs.txt && \\ cd /app/docs && \\ make html\" python -m webbrowser file://${PWD}/docs/_build/html/index.html\nA virtualenv is required, to avoid a pip bug when changing the version of a system-installed package.", "headers": [ "Documentation", "Generating documentation" diff --git a/readthedocs/search/tests/data/pipeline/installation.json b/readthedocs/search/tests/data/pipeline/installation.json index 30fb78d1d78..37bf0170c1b 100644 --- a/readthedocs/search/tests/data/pipeline/installation.json +++ b/readthedocs/search/tests/data/pipeline/installation.json @@ -1,5 +1,5 @@ { - "content": "Pipeline-Installation Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.", + "content": "PipelineInstallation Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.", "headers": [ "Installation", "Upgrading from 1.3", diff --git a/readthedocs/search/tests/data/pipeline/signals.json b/readthedocs/search/tests/data/pipeline/signals.json index 3bf3a80537c..b8113de979f 100644 --- a/readthedocs/search/tests/data/pipeline/signals.json +++ b/readthedocs/search/tests/data/pipeline/signals.json @@ -1,5 +1,5 @@ { - "content": "pipeline-Signals List of all signals sent by pipeline.\ncss_compressed\npipeline.signals.css_compressed\nWhenever a css package is compressed, this signal is sent after the compression.\nArguments sent with this signal :\nsender:\nThe Packager class that compressed the group.\npackage:\nThe package actually compressed.\njs_compressed\npipeline.signals.js_compressed\nWhenever a js package is compressed, this signal is sent after the compression.\nArguments sent with this signal :\nsender:\nThe Packager class that compressed the group.\npackage:\nThe package actually compressed.", + "content": "pipelineSignals List of all signals sent by pipeline.\ncss_compressed\npipeline.signals.css_compressed\nWhenever a css package is compressed, this signal is sent after the compression.\nArguments sent with this signal :\nsender:\nThe Packager class that compressed the group.\npackage:\nThe package actually compressed.\njs_compressed\npipeline.signals.js_compressed\nWhenever a js package is compressed, this signal is sent after the compression.\nArguments sent with this signal :\nsender:\nThe Packager class that compressed the group.\npackage:\nThe package actually compressed.", "headers": [ "Signals", "css_compressed", diff --git a/readthedocs/search/tests/dummy_data.py b/readthedocs/search/tests/dummy_data.py index fbd4eed1f11..ed1d5c7e2f6 100644 --- a/readthedocs/search/tests/dummy_data.py +++ b/readthedocs/search/tests/dummy_data.py @@ -1,28 +1,7 @@ -import json -import os - -_DATA_FILES = { - 'pipeline': ['installation.json', 'signals.json'], - 'kuma': ['documentation.json', 'docker.json'], - 'docs': ['story.json', 'wiping.json'], +PROJECT_DATA_FILES = { + 'pipeline': ['installation', 'signals'], + 'kuma': ['documentation', 'docker'], + 'docs': ['story', 'wiping'], } - -def _get_dummy_json(): - dictionary = {} - for key, value in _DATA_FILES.items(): - data = [] - for file_name in value: - current_path = os.path.abspath(os.path.dirname(__file__)) - path = os.path.join(current_path, "data", key, file_name) - with open(path) as f: - content = json.load(f) - data.append(content) - - dictionary[key] = data - - return dictionary - - -DUMMY_PAGE_JSON = _get_dummy_json() -ALL_PROJECTS = DUMMY_PAGE_JSON.keys() +ALL_PROJECTS = PROJECT_DATA_FILES.keys() diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py index 096e67adb03..fad2e666120 100644 --- a/readthedocs/search/tests/test_views.py +++ b/readthedocs/search/tests/test_views.py @@ -1,8 +1,13 @@ +import random +import string + import pytest from django.core.management import call_command from django.core.urlresolvers import reverse from django_dynamic_fixture import G +from django_elasticsearch_dsl import Index from pyquery import PyQuery as pq +from pytest_mock import mock from readthedocs.builds.constants import LATEST from readthedocs.builds.models import Version @@ -12,13 +17,9 @@ @pytest.mark.django_db @pytest.mark.search -class TestElasticSearch(object): +class TestProjectSearch(object): url = reverse('search') - def _reindex_elasticsearch(self, es_index): - call_command('reindex_elasticsearch') - es_index.refresh_index() - def _get_search_result(self, url, client, search_params): resp = client.get(url, search_params) assert resp.status_code == 200 @@ -27,21 +28,16 @@ def _get_search_result(self, url, client, search_params): result = page.find('.module-list-wrapper .module-item-title') return result, page - @pytest.fixture(autouse=True) - def elastic_index(self, mock_parse_json, all_projects, es_index): - self._reindex_elasticsearch(es_index=es_index) - def test_search_by_project_name(self, client, project): result, _ = self._get_search_result(url=self.url, client=client, search_params={'q': project.name}) assert project.name.encode('utf-8') in result.text().encode('utf-8') - def test_search_project_show_languages(self, client, project, es_index): + def test_search_project_show_languages(self, client, project): """Test that searching project should show all available languages""" # Create a project in bn and add it as a translation G(Project, language='bn', name=project.name) - self._reindex_elasticsearch(es_index=es_index) result, page = self._get_search_result(url=self.url, client=client, search_params={'q': project.name}) @@ -51,11 +47,10 @@ def test_search_project_show_languages(self, client, project, es_index): assert len(content) == 2 assert 'bn' in content.text() - def test_search_project_filter_language(self, client, project, es_index): + def test_search_project_filter_language(self, client, project): """Test that searching project filtered according to language""" # Create a project in bn and add it as a translation translate = G(Project, language='bn', name=project.name) - self._reindex_elasticsearch(es_index=es_index) search_params = {'q': project.name, 'language': 'bn'} result, page = self._get_search_result(url=self.url, client=client, @@ -65,10 +60,24 @@ def test_search_project_filter_language(self, client, project, es_index): assert len(result) == 1 content = page.find('.navigable .language-list') - # There should be 1 languages - assert len(content) == 1 + # There should be 2 languages because both `en` and `bn` should show there + assert len(content) == 2 assert 'bn' in content.text() + +@pytest.mark.django_db +@pytest.mark.search +class TestElasticSearch(object): + url = reverse('search') + + def _get_search_result(self, url, client, search_params): + resp = client.get(url, search_params) + assert resp.status_code == 200 + + page = pq(resp.content) + result = page.find('.module-list-wrapper .module-item-title') + return result, page + @pytest.mark.parametrize('data_type', ['content', 'headers', 'title']) @pytest.mark.parametrize('page_num', [0, 1]) def test_search_by_file_content(self, client, project, data_type, page_num): @@ -77,9 +86,9 @@ def test_search_by_file_content(self, client, project, data_type, page_num): result, _ = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) - assert len(result) == 1 + assert len(result) == 1, ("failed"+ query) - def test_file_search_show_projects(self, client): + def test_file_search_show_projects(self, client, all_projects): """Test that search result page shows list of projects while searching for files""" # `Github` word is present both in `kuma` and `pipeline` files @@ -131,7 +140,6 @@ def test_file_search_show_versions(self, client, all_projects, es_index, setting project = all_projects[0] # Create some versions of the project versions = [G(Version, project=project) for _ in range(3)] - self._reindex_elasticsearch(es_index=es_index) query = get_search_query_from_project_file(project_slug=project.slug) @@ -163,7 +171,6 @@ def test_file_search_subprojects(self, client, all_projects, es_index): subproject = all_projects[1] # Add another project as subproject of the project project.add_subproject(subproject) - self._reindex_elasticsearch(es_index=es_index) # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) diff --git a/readthedocs/search/tests/utils.py b/readthedocs/search/tests/utils.py index a48ea83dd74..0a049944ea0 100644 --- a/readthedocs/search/tests/utils.py +++ b/readthedocs/search/tests/utils.py @@ -1,4 +1,4 @@ -from readthedocs.search.tests.dummy_data import DUMMY_PAGE_JSON +from readthedocs.projects.models import HTMLFile def get_search_query_from_project_file(project_slug, page_num=0, data_type='title'): @@ -6,8 +6,9 @@ def get_search_query_from_project_file(project_slug, page_num=0, data_type='titl Query is generated from the value of `data_type` """ - all_pages = DUMMY_PAGE_JSON[project_slug] - file_data = all_pages[page_num] + html_file = HTMLFile.objects.filter(project__slug=project_slug).order_by('id')[page_num] + + file_data = html_file.processed_json query_data = file_data[data_type] if data_type in ['headers']: diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 7d3a51d5fc2..385b7d371ca 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -8,10 +8,12 @@ from pprint import pprint from django.conf import settings -from django.shortcuts import render +from django.shortcuts import render, get_object_or_404 from readthedocs.builds.constants import LATEST +from readthedocs.projects.models import Project from readthedocs.search import lib as search_lib +from readthedocs.search.documents import ProjectDocument, PageDocument log = logging.getLogger(__name__) LOG_TEMPLATE = u'(Elastic Search) [{user}:{type}] [{project}:{version}:{language}] {msg}' @@ -45,26 +47,27 @@ def elastic_search(request): if user_input.query: if user_input.type == 'project': - results = search_lib.search_project( - request, user_input.query, language=user_input.language) + project_search = ProjectDocument.faceted_search(query=user_input.query, + language=user_input.language) + results = project_search.execute() + facets = results.facets elif user_input.type == 'file': - results = search_lib.search_file( - request, user_input.query, project_slug=user_input.project, - version_slug=user_input.version, taxonomy=user_input.taxonomy) + kwargs = {} + if user_input.project: + queryset = Project.objects.api(request.user).only('slug') + project = get_object_or_404(queryset, slug=user_input.project) - if results: - # pre and post 1.0 compat - for num, hit in enumerate(results['hits']['hits']): - for key, val in list(hit['fields'].items()): - if isinstance(val, list): - results['hits']['hits'][num]['fields'][key] = val[0] + subprojects_slug = (queryset.filter(superprojects__parent_id=project.id) + .values_list('slug', flat=True)) - if 'facets' in results: - for facet_type in ['project', 'version', 'taxonomy', 'language']: - if facet_type in results['facets']: - facets[facet_type] = collections.OrderedDict() - for term in results['facets'][facet_type]['terms']: - facets[facet_type][term['term']] = term['count'] + projects_list = [project.slug] + list(subprojects_slug) + kwargs['projects_list'] = projects_list + if user_input.version: + kwargs['versions_list'] = user_input.version + + page_search = PageDocument.faceted_search(query=user_input.query, **kwargs) + results = page_search.execute() + facets = results.facets if settings.DEBUG: print(pprint(results)) @@ -87,7 +90,7 @@ def elastic_search(request): template_vars = user_input._asdict() template_vars.update({ 'results': results, - 'facets': facets, + 'facets': facets }) return render( request, diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index fcacc58bdf0..718b5fd621c 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -86,6 +86,7 @@ def INSTALLED_APPS(self): # noqa 'django_extensions', 'messages_extends', 'tastypie', + 'django_elasticsearch_dsl', # our apps 'readthedocs.projects', @@ -101,6 +102,7 @@ def INSTALLED_APPS(self): # noqa 'readthedocs.notifications', 'readthedocs.integrations', 'readthedocs.analytics', + 'readthedocs.search', # allauth @@ -313,8 +315,37 @@ def USE_PROMOS(self): # noqa # Elasticsearch settings. ES_HOSTS = ['127.0.0.1:9200'] - ES_DEFAULT_NUM_REPLICAS = 0 - ES_DEFAULT_NUM_SHARDS = 5 + ELASTICSEARCH_DSL = { + 'default': { + 'hosts': '127.0.0.1:9200' + }, + } + + # ANALYZER = 'analysis': { + # 'analyzer': { + # 'default_icu': { + # 'type': 'custom', + # 'tokenizer': 'icu_tokenizer', + # 'filter': ['word_delimiter', 'icu_folding', 'icu_normalizer'], + # } + # } + # } + + ES_INDEXES = { + 'project': { + 'name': 'project_index', + 'settings': {'number_of_shards': 5, + 'number_of_replicas': 0 + } + }, + 'page': { + 'name': 'page_index', + 'settings': { + 'number_of_shards': 5, + 'number_of_replicas': 0, + } + }, + } ALLOWED_HOSTS = ['*'] diff --git a/readthedocs/settings/test.py b/readthedocs/settings/test.py index f49dc8584b1..79c7486e3cb 100644 --- a/readthedocs/settings/test.py +++ b/readthedocs/settings/test.py @@ -17,6 +17,14 @@ class CommunityTestSettings(CommunityDevSettings): DEBUG = False TEMPLATE_DEBUG = False + @property + def ES_INDEXES(self): # noqa - avoid pep8 N802 + es_indexes = super(CommunityTestSettings, self).ES_INDEXES + for index_conf in es_indexes.values(): + index_conf['name'] = "test_{}".format(index_conf['name']) + + return es_indexes + @property def LOGGING(self): # noqa - avoid pep8 N802 logging = super(CommunityDevSettings, self).LOGGING diff --git a/readthedocs/templates/search/elastic_search.html b/readthedocs/templates/search/elastic_search.html index 1a3dd0211e8..a79da82331a 100644 --- a/readthedocs/templates/search/elastic_search.html +++ b/readthedocs/templates/search/elastic_search.html @@ -26,8 +26,8 @@ {% if facets.language %}
- {% if result.fields.name %} + {% if result.name %} {# Project #} - {{ result.fields.name }} - {% for fragment in result.highlight.description|slice:":3" %} + {{ result.name }} + {{ result.meta.description }} + {% for fragment in result.meta.highlight.description|slice:":3" %}
...{{ fragment|safe }}...
@@ -156,11 +138,11 @@...{{ fragment|safe }}...
diff --git a/requirements/pip.txt b/requirements/pip.txt index 8966075bbd1..72792d7823a 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -47,8 +47,9 @@ httplib2==0.11.3 GitPython==2.1.10 # Search -elasticsearch==1.5.0 -pyelasticsearch==0.7.1 +elasticsearch==6.2.0 +elasticsearch-dsl==6.1.0 +django-elasticsearch-dsl==0.5.0 pyquery==1.4.0 # Utils diff --git a/scripts/travis/install_elasticsearch.sh b/scripts/travis/install_elasticsearch.sh index f63f3ae6168..b67a6d15188 100755 --- a/scripts/travis/install_elasticsearch.sh +++ b/scripts/travis/install_elasticsearch.sh @@ -2,6 +2,6 @@ if [ $ES_DOWNLOAD_URL ] then wget ${ES_DOWNLOAD_URL} tar -xzf elasticsearch-${ES_VERSION}.tar.gz - ./elasticsearch-${ES_VERSION}/bin/plugin -install elasticsearch/elasticsearch-analysis-icu/2.3.0 - ./elasticsearch-${ES_VERSION}/bin/elasticsearch & + ./elasticsearch-${ES_VERSION}/bin/elasticsearch-plugin install analysis-icu + ./elasticsearch-${ES_VERSION}/bin/elasticsearch -d fi