Skip to content

Commit

Permalink
refactor: index mechanism to enhance overall performance (halo-dev#6039)
Browse files Browse the repository at this point in the history
#### What type of PR is this?
/kind improvement
/area core
/milestone 2.17.x

#### What this PR does / why we need it:
重构索引机制的查询和排序以提升整体性能

**how to test it?**
使用 postgre 数据库,初始化 Halo ,然后执行以下脚本创建 30w 文章数据进行测试:
<details>
<summary>点击展开查看 SQL</summary>

```sql
DO $$
DECLARE
    i integer;
    postNameIndex integer;
    snapshotName varchar;
    totalRecords integer;
BEGIN
    postNameIndex := 1;
    totalRecords := 300000;

    FOR i IN 1..3 LOOP
      INSERT INTO "public"."extensions" ("name", "data", "version")
      VALUES (
          '/registry/content.halo.run/categories/category-'||i,
          convert_to(
              jsonb_build_object(
                  'spec', jsonb_build_object(
                      'displayName', '分类-'||i,
                      'slug', 'category-'||i,
                      'description', '测试分类',
                      'cover', '',
                      'template', '',
                      'priority', 0,
                      'children', '[]'::jsonb
                  ),
                  'status', jsonb_build_object(
                      'permalink', '/categories/category-'||i,
                      'postCount', totalRecords,
                      'visiblePostCount', totalRecords
                  ),
                  'apiVersion', 'content.halo.run/v1alpha1',
                  'kind', 'Category',
                  'metadata', jsonb_build_object(
                      'finalizers', jsonb_build_array('category-protection'),
                      'name', 'category-' || i,
                      'annotations', jsonb_build_object(
                          'content.halo.run/permalink-pattern', 'categories'
                      ),
                      'version', 0,
                      'creationTimestamp', '2024-06-12T03:56:40.315592Z'
                  )
          )::text, 'UTF8'),
          0
      );
    END LOOP;


    FOR i IN 1..3 LOOP
      INSERT INTO "public"."extensions" ("name", "data", "version")
        VALUES (
            '/registry/content.halo.run/tags/tag-' || i,
            convert_to(
               jsonb_build_object(
               'spec', jsonb_build_object(
                   'displayName', 'Halo tag ' || i,
                   'slug', 'tag-'||i,
                   'color', '#ffffff',
                   'cover', ''
               ),
               'status', jsonb_build_object(
                   'permalink', '/tags/tag-' || i,
                   'visiblePostCount', totalRecords,
                   'postCount', totalRecords,
                   'observedVersion', 0
               ),
               'apiVersion', 'content.halo.run/v1alpha1',
               'kind', 'Tag',
               'metadata', jsonb_build_object(
                   'finalizers', jsonb_build_array('tag-protection'),
                   'name', 'tag-'||i,
                   'annotations', jsonb_build_object(
                       'content.halo.run/permalink-pattern', 'tags'
                   ),
                   'version', 0,
                   'creationTimestamp', '2024-06-12T03:56:40.406407Z'
               )
       )::text, 'UTF8'),
       0);
    END LOOP;

    FOR i IN postNameIndex..totalRecords LOOP
        -- Generate snapshotName
        snapshotName := 'snapshot-' || i;

        -- Insert post data
        INSERT INTO "public"."extensions" ("name", "data", "version")
        VALUES (
            '/registry/content.halo.run/posts/post-' || postNameIndex,
            convert_to(
                jsonb_build_object(
                    'spec', jsonb_build_object(
                        'title', 'title-' || postNameIndex,
                        'slug', 'slug-' || postNameIndex,
                        'releaseSnapshot', snapshotName,
                        'headSnapshot', snapshotName,
                        'baseSnapshot', snapshotName,
                        'owner', 'admin',
                        'template', '',
                        'cover', '',
                        'deleted', false,
                        'publish', true,
                        'pinned', false,
                        'allowComment', true,
                        'visible', 'PUBLIC',
                        'priority', 0,
                        'excerpt', jsonb_build_object(
                            'autoGenerate', true,
                            'raw', ''
                        ),
                        'categories', ARRAY['category-kEvDb', 'category-XcRVk', 'category-adca'],
                        'tags', ARRAY['tag-RtKos', 'tag-vEsTR', 'tag-UBKCc'],
                        'htmlMetas', '[]'::jsonb
                    ),
                    'status', jsonb_build_object(
                        'phase', 'PUBLISHED',
                        'conditions', ARRAY[
                            jsonb_build_object(
                                'type', 'PUBLISHED',
                                'status', 'TRUE',
                                'lastTransitionTime', '2024-06-11T10:16:15.617748Z',
                                'message', 'Post published successfully.',
                                'reason', 'Published'
                            ),
                            jsonb_build_object(
                                'type', 'DRAFT',
                                'status', 'TRUE',
                                'lastTransitionTime', '2024-06-11T10:16:15.457668Z',
                                'message', 'Drafted post successfully.',
                                'reason', 'DraftedSuccessfully'
                            )
                        ],
                        'permalink', '/archives/slug-' || postNameIndex,
                        'excerpt', '如果你看到了这一篇文章,那么证明你已经安装成功了,感谢使用 Halo 进行创作,希望能够使用愉快。',
                        'inProgress', false,
                        'contributors', ARRAY['admin'],
                        'lastModifyTime', '2024-06-11T10:16:15.421467Z',
                        'observedVersion', 0
                    ),
                    'apiVersion', 'content.halo.run/v1alpha1',
                    'kind', 'Post',
                    'metadata', jsonb_build_object(
                        'finalizers', ARRAY['post-protection'],
                        'name', 'post-' || postNameIndex,
                        'labels', jsonb_build_object(
                            'content.halo.run/published', 'true',
                            'content.halo.run/deleted', 'false',
                            'content.halo.run/owner', 'admin',
                            'content.halo.run/visible', 'PUBLIC',
                            'content.halo.run/archive-year', '2024',
                            'content.halo.run/archive-month', '06',
                            'content.halo.run/archive-day', '11'
                        ),
                        'annotations', jsonb_build_object(
                            'content.halo.run/permalink-pattern', '/archives/{slug}',
                            'content.halo.run/last-released-snapshot', snapshotName,
                            'checksum/config', '73e40d4115f5a7d1e74fcc9228861c53d2ef60468e1e606e367b01efef339309'
                        ),
                        'version', 0,
                        'creationTimestamp', '2024-06-11T05:51:46.059292Z'
                    )
                )::text, 'UTF8'),
            1
        );

        -- Insert content data
        INSERT INTO "public"."extensions" ("name", "data", "version")
        VALUES (
            '/registry/content.halo.run/snapshots/' || snapshotName,
            convert_to(
                jsonb_build_object(
                    'spec', jsonb_build_object(
                        'subjectRef', jsonb_build_object(
                            'group', 'content.halo.run',
                            'version', 'v1alpha1',
                            'kind', 'Post',
                            'name', 'post-' || postNameIndex
                        ),
                        'rawType', 'HTML',
                        'rawPatch', '<p style=\"\">测试内容</p>',
                        'contentPatch', '<p style=\"\">测试内容</p>',
                        'lastModifyTime', '2024-06-11T06:01:25.748755Z',
                        'owner', 'admin',
                        'contributors', ARRAY['admin']
                    ),
                    'apiVersion', 'content.halo.run/v1alpha1',
                    'kind', 'Snapshot',
                    'metadata', jsonb_build_object(
                        'name', snapshotName,
                        'annotations', jsonb_build_object(
                            'content.halo.run/keep-raw', 'true'
                        ),
                        'creationTimestamp', '2024-06-11T06:01:25.748925Z'
                    )
                )::text, 'UTF8'),
            1
        );

        postNameIndex := postNameIndex + 1;
    END LOOP;
END $$;
```

</details>

使用以下 API 查询文章
```
curl 'http://localhost:8090/apis/api.console.halo.run/v1alpha1/posts?page=1&size=20&labelSelector=content.halo.run%2Fdeleted%3Dfalse&labelSelector=content.halo.run%2Fpublished%3Dtrue&fieldSelector=spec.categories%3Dcategory-1&fieldSelector=spec.tags%3Dc33ceabb-d8f1-4711-8991-bb8f5c92ad7c&fieldSelector=status.contributors%3Dadmin&fieldSelector=spec.visible%3DPUBLIC' \
--header 'Authorization: Basic YWRtaW46YWRtaW4='
```
Before:

![SCR-20240612-o20](https://github.com/halo-dev/halo/assets/38999863/fc27a265-6571-4361-a707-a683ea040837)
After:

![SCR-20240612-q1c](https://github.com/halo-dev/halo/assets/38999863/c0a241b8-5ed4-4973-8dfc-c260ffccd727)

#### Does this PR introduce a user-facing change?
```release-note
重构索引机制的查询和排序使整体性能提升 50% 以上
```
  • Loading branch information
guqing authored Jun 21, 2024
1 parent 8bdde31 commit c10862d
Show file tree
Hide file tree
Showing 44 changed files with 1,903 additions and 1,148 deletions.
17 changes: 16 additions & 1 deletion api/src/main/java/run/halo/app/extension/ListOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,19 @@
public class ListOptions {
private LabelSelector labelSelector;
private FieldSelector fieldSelector;
}

@Override
public String toString() {
var sb = new StringBuilder();
if (fieldSelector != null) {
sb.append("fieldSelector: ").append(fieldSelector.query());
}
if (labelSelector != null) {
if (!sb.isEmpty()) {
sb.append(", ");
}
sb.append("labelSelector: ").append(labelSelector);
}
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.Sort;
import run.halo.app.extension.Extension;
import run.halo.app.extension.ExtensionClient;
import run.halo.app.extension.ExtensionMatcher;
Expand Down Expand Up @@ -58,7 +59,7 @@ public void start() {
listOptions.setFieldSelector(listMatcher.getFieldSelector());
listOptions.setLabelSelector(listMatcher.getLabelSelector());
}
indexedQueryEngine.retrieveAll(type, listOptions)
indexedQueryEngine.retrieveAll(type, listOptions, Sort.by("metadata.creationTimestamp"))
.forEach(name -> watcher.onAdd(new Request(name)));
}
client.watch(this.watcher);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public class IndexDescriptor {
private final IndexSpec spec;

/**
* Record whether the index is ready, managed by {@link IndexBuilder}.
* Record whether the index is ready, managed by {@code IndexBuilder}.
*/
private boolean ready;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.NavigableSet;
import run.halo.app.extension.Metadata;

/**
Expand Down Expand Up @@ -34,7 +34,7 @@
public interface IndexEntry {

/**
* Acquires the read lock for reading such as {@link #getByIndexKey(String)},
* Acquires the read lock for reading such as {@link #getObjectNamesBy(String)},
* {@link #entries()}, {@link #indexedKeys()}, because the returned result set of these
* methods is not immutable.
*/
Expand Down Expand Up @@ -87,7 +87,7 @@ public interface IndexEntry {
*
* @return distinct indexed keys of this entry.
*/
Set<String> indexedKeys();
NavigableSet<String> indexedKeys();

/**
* <p>Returns the entries of this entry in order.</p>
Expand All @@ -99,19 +99,34 @@ public interface IndexEntry {
Collection<Map.Entry<String, String>> entries();

/**
* Returns the immutable entries of this entry in order, it is safe to modify the returned
* result, but extra cost is made.
*
* @return immutable entries of this entry.
* <p>Returns the position of the object name in the indexed attribute value mapping for
* sorting.</p>
* For example:
* <pre>
* metadata.name | field1
* ------------- | ------
* foo | 1
* bar | 2
* baz | 2
* </pre>
* "field1" is the indexed attribute, and the position of the object name in the indexed
* attribute
* value mapping for sorting is:
* <pre>
* foo -> 0
* bar -> 1
* baz -> 1
* </pre>
* "bar" and "baz" have the same value, so they have the same position.
*/
Collection<Map.Entry<String, String>> immutableEntries();
Map<String, Integer> getIdPositionMap();

/**
* Returns the object names of this entry in order.
*
* @return object names of this entry.
*/
List<String> getByIndexKey(String indexKey);
List<String> getObjectNamesBy(String indexKey);

void clear();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package run.halo.app.extension.index;

import java.util.Collection;
import java.util.NavigableSet;
import java.util.Set;

public interface IndexEntryOperator {

/**
* Search all values that key less than the target key.
*
* @param key target key
* @param orEqual whether to include the value of the target key
* @return object names that key less than the target key
*/
NavigableSet<String> lessThan(String key, boolean orEqual);

/**
* Search all values that key greater than the target key.
*
* @param key target key
* @param orEqual whether to include the value of the target key
* @return object names that key greater than the target key
*/
NavigableSet<String> greaterThan(String key, boolean orEqual);

/**
* Search all values that key in the range of [start, end].
*
* @param start start key
* @param end end key
* @param startInclusive whether to include the value of the start key
* @param endInclusive whether to include the value of the end key
* @return object names that key in the range of [start, end]
*/
NavigableSet<String> range(String start, String end, boolean startInclusive,
boolean endInclusive);

/**
* Find all values that key equals to the target key.
*
* @param key target key
* @return object names that key equals to the target key
*/
NavigableSet<String> find(String key);

NavigableSet<String> findIn(Collection<String> keys);

/**
* Get all values in the index entry.
*
* @return a set of all object names
*/
Set<String> getValues();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package run.halo.app.extension.index;

import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.NavigableSet;
import java.util.Set;
import java.util.TreeSet;
import org.springframework.util.Assert;

public class IndexEntryOperatorImpl implements IndexEntryOperator {
private final IndexEntry indexEntry;

public IndexEntryOperatorImpl(IndexEntry indexEntry) {
this.indexEntry = indexEntry;
}

private static NavigableSet<String> createNavigableSet() {
return new TreeSet<>(KeyComparator.INSTANCE);
}

@Override
public NavigableSet<String> lessThan(String key, boolean orEqual) {
Assert.notNull(key, "Key must not be null.");
indexEntry.acquireReadLock();
try {
var navigableIndexedKeys = indexEntry.indexedKeys();
var headSetKeys = navigableIndexedKeys.headSet(key, orEqual);
return findIn(headSetKeys);
} finally {
indexEntry.releaseReadLock();
}
}

@Override
public NavigableSet<String> greaterThan(String key, boolean orEqual) {
Assert.notNull(key, "Key must not be null.");
indexEntry.acquireReadLock();
try {
var navigableIndexedKeys = indexEntry.indexedKeys();
var tailSetKeys = navigableIndexedKeys.tailSet(key, orEqual);
return findIn(tailSetKeys);
} finally {
indexEntry.releaseReadLock();
}
}

@Override
public NavigableSet<String> range(String start, String end, boolean startInclusive,
boolean endInclusive) {
Assert.notNull(start, "The start must not be null.");
Assert.notNull(end, "The end must not be null.");
indexEntry.acquireReadLock();
try {
var navigableIndexedKeys = indexEntry.indexedKeys();
var tailSetKeys = navigableIndexedKeys.subSet(start, startInclusive, end, endInclusive);
return findIn(tailSetKeys);
} finally {
indexEntry.releaseReadLock();
}
}

@Override
public NavigableSet<String> find(String key) {
Assert.notNull(key, "The key must not be null.");
indexEntry.acquireReadLock();
try {
var resultSet = createNavigableSet();
var result = indexEntry.getObjectNamesBy(key);
if (result != null) {
resultSet.addAll(result);
}
return resultSet;
} finally {
indexEntry.releaseReadLock();
}
}

@Override
public NavigableSet<String> findIn(Collection<String> keys) {
if (keys == null || keys.isEmpty()) {
return createNavigableSet();
}
indexEntry.acquireReadLock();
try {
var keysToSearch = new HashSet<>(keys);
var resultSet = createNavigableSet();
for (var entry : indexEntry.entries()) {
if (keysToSearch.contains(entry.getKey())) {
resultSet.add(entry.getValue());
}
}
return resultSet;
} finally {
indexEntry.releaseReadLock();
}
}

@Override
public Set<String> getValues() {
indexEntry.acquireReadLock();
try {
Set<String> uniqueValues = new HashSet<>();
for (Map.Entry<String, String> entry : indexEntry.entries()) {
uniqueValues.add(entry.getValue());
}
return uniqueValues;
} finally {
indexEntry.releaseReadLock();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package run.halo.app.extension.index;

import java.util.List;
import org.springframework.data.domain.Sort;
import run.halo.app.extension.GroupVersionKind;
import run.halo.app.extension.ListOptions;
import run.halo.app.extension.ListResult;
Expand Down Expand Up @@ -36,7 +37,8 @@ public interface IndexedQueryEngine {
*
* @param type the type of the object must exist in {@link run.halo.app.extension.SchemeManager}
* @param options the list options to use for retrieving the object records
* @param sort the sort to use for retrieving the object records
* @return a collection of {@link Metadata#getName()}
*/
List<String> retrieveAll(GroupVersionKind type, ListOptions options);
List<String> retrieveAll(GroupVersionKind type, ListOptions options, Sort sort);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.Iterator;
import java.util.function.Function;
import org.springframework.lang.NonNull;
import run.halo.app.extension.Extension;

/**
Expand All @@ -20,7 +21,7 @@ public interface Indexer {
/**
* <p>Index the specified {@link Extension} by {@link IndexDescriptor}s.</p>
* <p>First, the {@link Indexer} will index the {@link Extension} by the
* {@link IndexDescriptor}s and record the index entries to {@link IndexerTransaction} and
* {@link IndexDescriptor}s and record the index entries to {@code IndexerTransaction} and
* commit the transaction, if any error occurs, the transaction will be rollback to keep the
* {@link Indexer} consistent.</p>
*
Expand All @@ -33,7 +34,7 @@ public interface Indexer {
* <p>Update indexes for the specified {@link Extension} by {@link IndexDescriptor}s.</p>
* <p>First, the {@link Indexer} will remove the index entries of the {@link Extension} by
* the old {@link IndexDescriptor}s and reindex the {@link Extension} to generate change logs
* to {@link IndexerTransaction} and commit the transaction, if any error occurs, the
* to {@code IndexerTransaction} and commit the transaction, if any error occurs, the
* transaction will be rollback to keep the {@link Indexer} consistent.</p>
*
* @param extension the {@link Extension} to be updated
Expand Down Expand Up @@ -73,19 +74,33 @@ public interface Indexer {
*/
void removeIndexRecords(Function<IndexDescriptor, Boolean> matchFn);

/**
* <p>Get the {@link IndexEntry} by index name if found and ready.</p>
*
* @param name an index name
* @return the {@link IndexEntry} if found
* @throws IllegalArgumentException if the index name is not found or the index is not ready
*/
@NonNull
IndexEntry getIndexEntry(String name);

/**
* <p>Gets an iterator over all the ready {@link IndexEntry}s, in no particular order.</p>
*
* @return an iterator over all the ready {@link IndexEntry}s
* @link {@link IndexDescriptor#isReady()}
* @see IndexDescriptor#isReady()
*/
Iterator<IndexEntry> readyIndexesIterator();

/**
* <p>Gets an iterator over all the {@link IndexEntry}s, in no particular order.</p>
*
* @return an iterator over all the {@link IndexEntry}s
* @link {@link IndexDescriptor#isReady()}
* @see IndexDescriptor#isReady()
*/
Iterator<IndexEntry> allIndexesIterator();

void acquireReadLock();

void releaseReadLock();
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ public All(String fieldName) {

@Override
public NavigableSet<String> matches(QueryIndexView indexView) {
return indexView.getAllIdsForField(fieldName);
return indexView.getIdsForField(fieldName);
}

@Override
public String toString() {
return fieldName + " != null";
}
}
7 changes: 7 additions & 0 deletions api/src/main/java/run/halo/app/extension/index/query/And.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.google.common.collect.Sets;
import java.util.Collection;
import java.util.NavigableSet;
import java.util.stream.Collectors;

public class And extends LogicalQuery {

Expand Down Expand Up @@ -33,4 +34,10 @@ public NavigableSet<String> matches(QueryIndexView indexView) {
}
return resultSet == null ? Sets.newTreeSet() : resultSet;
}

@Override
public String toString() {
return "(" + childQueries.stream().map(Query::toString)
.collect(Collectors.joining(" AND ")) + ")";
}
}
Loading

0 comments on commit c10862d

Please sign in to comment.