-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Upgrade Elasticsearch to version 6.x #4211
Changes from 15 commits
3c41b42
6410495
272b50a
b8f1a06
6c430e5
035c312
746b378
de47978
ab6fffb
3523fab
9a5b0ed
e9b1c03
37f6936
f730556
05f5e05
0965a94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from django.db import models | ||
|
||
|
||
class HTMLFileManager(models.Manager): | ||
|
||
def get_queryset(self): | ||
return super(HTMLFileManager, self).get_queryset().filter(name__endswith='.html') |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# -*- coding: utf-8 -*- | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This migration should have a name, saying what it does. I also worry that this migration will become out of date with a long-running branch beside the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes! We can absolutely do this. I will keep this in mind. |
||
# Generated by Django 1.9.13 on 2018-06-18 16:45 | ||
from __future__ import unicode_literals | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('projects', '0025_show-version-warning-existing-projects'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='HTMLFile', | ||
fields=[ | ||
], | ||
options={ | ||
'proxy': True, | ||
}, | ||
bases=('projects.importedfile',), | ||
), | ||
migrations.AlterField( | ||
model_name='project', | ||
name='comment_moderation', | ||
field=models.BooleanField(default=False, verbose_name='Comment Moderation'), | ||
), | ||
migrations.AlterField( | ||
model_name='project', | ||
name='documentation_type', | ||
field=models.CharField(choices=[('auto', 'Automatically Choose'), ('sphinx', 'Sphinx Html'), ('mkdocs', 'Mkdocs (Markdown)'), ('sphinx_htmldir', 'Sphinx HtmlDir'), ('sphinx_singlehtml', 'Sphinx Single Page HTML')], default='sphinx', help_text='Type of documentation you are building. <a href="http://www.sphinx-doc.org/en/stable/builders.html#sphinx.builders.html.DirectoryHTMLBuilder">More info</a>.', max_length=20, verbose_name='Documentation type'), | ||
), | ||
migrations.AlterField( | ||
model_name='project', | ||
name='language', | ||
field=models.CharField(choices=[('aa', 'Afar'), ('ab', 'Abkhaz'), ('af', 'Afrikaans'), ('am', 'Amharic'), ('ar', 'Arabic'), ('as', 'Assamese'), ('ay', 'Aymara'), ('az', 'Azerbaijani'), ('ba', 'Bashkir'), ('be', 'Belarusian'), ('bg', 'Bulgarian'), ('bh', 'Bihari'), ('bi', 'Bislama'), ('bn', 'Bengali'), ('bo', 'Tibetan'), ('br', 'Breton'), ('ca', 'Catalan'), ('co', 'Corsican'), ('cs', 'Czech'), ('cy', 'Welsh'), ('da', 'Danish'), ('de', 'German'), ('dz', 'Dzongkha'), ('el', 'Greek'), ('en', 'English'), ('eo', 'Esperanto'), ('es', 'Spanish'), ('et', 'Estonian'), ('eu', 'Basque'), ('fa', 'Iranian'), ('fi', 'Finnish'), ('fj', 'Fijian'), ('fo', 'Faroese'), ('fr', 'French'), ('fy', 'Western Frisian'), ('ga', 'Irish'), ('gd', 'Scottish Gaelic'), ('gl', 'Galician'), ('gn', 'Guarani'), ('gu', 'Gujarati'), ('ha', 'Hausa'), ('hi', 'Hindi'), ('he', 'Hebrew'), ('hr', 'Croatian'), ('hu', 'Hungarian'), ('hy', 'Armenian'), ('ia', 'Interlingua'), ('id', 'Indonesian'), ('ie', 'Interlingue'), ('ik', 'Inupiaq'), ('is', 'Icelandic'), ('it', 'Italian'), ('iu', 'Inuktitut'), ('ja', 'Japanese'), ('jv', 'Javanese'), ('ka', 'Georgian'), ('kk', 'Kazakh'), ('kl', 'Kalaallisut'), ('km', 'Khmer'), ('kn', 'Kannada'), ('ko', 'Korean'), ('ks', 'Kashmiri'), ('ku', 'Kurdish'), ('ky', 'Kyrgyz'), ('la', 'Latin'), ('ln', 'Lingala'), ('lo', 'Lao'), ('lt', 'Lithuanian'), ('lv', 'Latvian'), ('mg', 'Malagasy'), ('mi', 'Maori'), ('mk', 'Macedonian'), ('ml', 'Malayalam'), ('mn', 'Mongolian'), ('mr', 'Marathi'), ('ms', 'Malay'), ('mt', 'Maltese'), ('my', 'Burmese'), ('na', 'Nauru'), ('ne', 'Nepali'), ('nl', 'Dutch'), ('no', 'Norwegian'), ('oc', 'Occitan'), ('om', 'Oromo'), ('or', 'Oriya'), ('pa', 'Panjabi'), ('pl', 'Polish'), ('ps', 'Pashto'), ('pt', 'Portuguese'), ('qu', 'Quechua'), ('rm', 'Romansh'), ('rn', 'Kirundi'), ('ro', 'Romanian'), ('ru', 'Russian'), ('rw', 'Kinyarwanda'), ('sa', 'Sanskrit'), ('sd', 'Sindhi'), ('sg', 'Sango'), ('si', 'Sinhala'), ('sk', 'Slovak'), ('sl', 'Slovenian'), ('sm', 'Samoan'), ('sn', 'Shona'), ('so', 'Somali'), ('sq', 'Albanian'), ('sr', 'Serbian'), ('ss', 'Swati'), ('st', 'Southern Sotho'), ('su', 'Sudanese'), ('sv', 'Swedish'), ('sw', 'Swahili'), ('ta', 'Tamil'), ('te', 'Telugu'), ('tg', 'Tajik'), ('th', 'Thai'), ('ti', 'Tigrinya'), ('tk', 'Turkmen'), ('tl', 'Tagalog'), ('tn', 'Tswana'), ('to', 'Tonga'), ('tr', 'Turkish'), ('ts', 'Tsonga'), ('tt', 'Tatar'), ('tw', 'Twi'), ('ug', 'Uyghur'), ('uk', 'Ukrainian'), ('ur', 'Urdu'), ('uz', 'Uzbek'), ('vi', 'Vietnamese'), ('vo', 'Volapuk'), ('wo', 'Wolof'), ('xh', 'Xhosa'), ('yi', 'Yiddish'), ('yo', 'Yoruba'), ('za', 'Zhuang'), ('zh', 'Chinese'), ('zu', 'Zulu'), ('nb_NO', 'Norwegian Bokmal'), ('pt_BR', 'Brazilian Portuguese'), ('es_MX', 'Mexican Spanish'), ('uk_UA', 'Ukrainian'), ('zh_CN', 'Simplified Chinese'), ('zh_TW', 'Traditional Chinese')], default='en', help_text="The language the project documentation is rendered in. Note: this affects your project's URL.", max_length=20, verbose_name='Language'), | ||
), | ||
migrations.AlterField( | ||
model_name='project', | ||
name='privacy_level', | ||
field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Level of privacy that you want on the repository. Protected means public but not in listings.', max_length=20, verbose_name='Privacy Level'), | ||
), | ||
migrations.AlterField( | ||
model_name='project', | ||
name='python_interpreter', | ||
field=models.CharField(choices=[('python', 'CPython 2.x'), ('python3', 'CPython 3.x')], default='python', help_text='The Python interpreter used to create the virtual environment.', max_length=20, verbose_name='Python Interpreter'), | ||
), | ||
migrations.AlterField( | ||
model_name='project', | ||
name='version_privacy_level', | ||
field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Default level of privacy you want on built versions of documentation.', max_length=20, verbose_name='Version Privacy Level'), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
from __future__ import absolute_import | ||
|
||
import datetime | ||
import fnmatch | ||
import hashlib | ||
import json | ||
import logging | ||
|
@@ -29,7 +30,7 @@ | |
|
||
from .constants import LOG_TEMPLATE | ||
from .exceptions import RepositoryError | ||
from .models import ImportedFile, Project, Domain | ||
from .models import ImportedFile, Project, Domain, HTMLFile | ||
from .signals import before_vcs, after_vcs, before_build, after_build, files_changed | ||
from readthedocs.builds.constants import (LATEST, | ||
BUILD_STATE_CLONING, | ||
|
@@ -943,18 +944,24 @@ def _manage_imported_files(version, path, commit): | |
changed_files = set() | ||
for root, __, filenames in os.walk(path): | ||
for filename in filenames: | ||
if fnmatch.fnmatch(filename, '*.html'): | ||
model_class = HTMLFile | ||
else: | ||
model_class = ImportedFile | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe this is needed, since it's all the same model in the database. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem is with actually signal manager. I have opened django-es/django-elasticsearch-dsl#111 about this. Untill it has been fixed, we need to have a proxy model for the purpose, I believe. |
||
|
||
dirpath = os.path.join(root.replace(path, '').lstrip('/'), | ||
filename.lstrip('/')) | ||
full_path = os.path.join(root, filename) | ||
md5 = hashlib.md5(open(full_path, 'rb').read()).hexdigest() | ||
try: | ||
obj, __ = ImportedFile.objects.get_or_create( | ||
# pylint: disable=unpacking-non-sequence | ||
obj, __ = model_class.objects.get_or_create( | ||
project=version.project, | ||
version=version, | ||
path=dirpath, | ||
name=filename, | ||
) | ||
except ImportedFile.MultipleObjectsReturned: | ||
except model_class.MultipleObjectsReturned: | ||
log.warning('Error creating ImportedFile') | ||
continue | ||
if obj.md5 != md5: | ||
|
@@ -963,6 +970,12 @@ def _manage_imported_files(version, path, commit): | |
if obj.commit != commit: | ||
obj.commit = commit | ||
obj.save() | ||
|
||
# Delete the HTMLFile first from previous versions | ||
HTMLFile.objects.filter(project=version.project, | ||
version=version | ||
).exclude(commit=commit).delete() | ||
|
||
# Delete ImportedFiles from previous versions | ||
ImportedFile.objects.filter(project=version.project, | ||
version=version | ||
|
@@ -1173,7 +1186,6 @@ def sync_callback(_, version_pk, commit, *args, **kwargs): | |
The first argument is the result from previous tasks, which we discard. | ||
""" | ||
fileify(version_pk, commit=commit) | ||
update_search(version_pk, commit=commit) | ||
|
||
|
||
@app.task() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
SEARCH_EXCLUDED_FILE = ['search.html', 'genindex.html', 'py-modindex.html'] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
from django.conf import settings | ||
from django_elasticsearch_dsl import DocType, Index, fields | ||
|
||
from readthedocs.projects.models import Project, HTMLFile | ||
from .conf import SEARCH_EXCLUDED_FILE | ||
|
||
from readthedocs.search.faceted_search import ProjectSearch, FileSearch | ||
|
||
project_conf = settings.ES_INDEXES['project'] | ||
project_index = Index(project_conf['name']) | ||
project_index.settings(**project_conf['settings']) | ||
|
||
page_conf = settings.ES_INDEXES['page'] | ||
page_index = Index(page_conf['name']) | ||
page_index.settings(**page_conf['settings']) | ||
|
||
|
||
@project_index.doc_type | ||
class ProjectDocument(DocType): | ||
|
||
class Meta(object): | ||
model = Project | ||
fields = ('name', 'slug', 'description') | ||
|
||
url = fields.TextField(attr='get_absolute_url') | ||
users = fields.NestedField(properties={ | ||
'username': fields.TextField(), | ||
'id': fields.IntegerField(), | ||
}) | ||
language = fields.KeywordField() | ||
|
||
@classmethod | ||
def faceted_search(cls, query, language=None, using=None, index=None): | ||
kwargs = { | ||
'using': using or cls._doc_type.using, | ||
'index': index or cls._doc_type.index, | ||
'doc_types': [cls], | ||
'model': cls._doc_type.model, | ||
'query': query | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this logic required? It seems a bit heavy/complex. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think, to keep alligned with the |
||
|
||
if language: | ||
kwargs['filters'] = {'language': language} | ||
|
||
return ProjectSearch(**kwargs) | ||
|
||
|
||
@page_index.doc_type | ||
class PageDocument(DocType): | ||
|
||
class Meta(object): | ||
model = HTMLFile | ||
fields = ('commit',) | ||
|
||
project = fields.KeywordField(attr='project.slug') | ||
version = fields.KeywordField(attr='version.slug') | ||
|
||
title = fields.TextField(attr='processed_json.title') | ||
headers = fields.TextField(attr='processed_json.headers') | ||
content = fields.TextField(attr='processed_json.content') | ||
path = fields.TextField(attr='processed_json.path') | ||
|
||
@classmethod | ||
def faceted_search(cls, query, projects_list=None, versions_list=None, using=None, index=None): | ||
kwargs = { | ||
'using': using or cls._doc_type.using, | ||
'index': index or cls._doc_type.index, | ||
'doc_types': [cls], | ||
'model': cls._doc_type.model, | ||
'query': query | ||
} | ||
filters = {} | ||
|
||
if projects_list: | ||
filters['project'] = projects_list | ||
if versions_list: | ||
filters['version'] = versions_list | ||
|
||
kwargs['filters'] = filters | ||
|
||
return FileSearch(**kwargs) | ||
|
||
def get_queryset(self): | ||
"""Overwrite default queryset to filter certain files to index""" | ||
queryset = super(PageDocument, self).get_queryset() | ||
|
||
# Do not index files that belong to non sphinx project | ||
# Also do not index certain files | ||
queryset = (queryset.filter(project__documentation_type='sphinx') | ||
.exclude(name__in=SEARCH_EXCLUDED_FILE)) | ||
return queryset | ||
|
||
def update(self, thing, refresh=None, action='index', **kwargs): | ||
"""Overwrite in order to index only certain files""" | ||
# Object not exist in the provided queryset should not be indexed | ||
# TODO: remove this overwrite when the issue has been fixed | ||
# See below link for more information | ||
# https://github.com/sabricot/django-elasticsearch-dsl/issues/111 | ||
if isinstance(thing, HTMLFile): | ||
# Its a model instance. | ||
queryset = self.get_queryset() | ||
obj = queryset.filter(pk=thing.pk) | ||
if not obj.exists(): | ||
return None | ||
|
||
return super(PageDocument, self).update(thing=thing, refresh=None, action='index', **kwargs) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting :)