Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gms): Write back lineage search results to in-memory cache bound to feature flag #6006

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@
@Data
public class FeatureFlags {
private boolean showSimplifiedHomepageByDefault = false;
private boolean lineageSearchCacheEnabled = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@
public class LineageSearchService {
private final SearchService _searchService;
private final GraphService _graphService;
@Nullable
private final Cache cache;
private final boolean cacheEnabled;

private static final String DEGREE_FILTER = "degree";
private static final String DEGREE_FILTER_INPUT = "degree.keyword";
Expand Down Expand Up @@ -71,10 +73,14 @@ public LineageSearchResult searchAcrossLineage(@Nonnull Urn sourceUrn, @Nonnull
@Nonnull List<String> entities, @Nullable String input, @Nullable Integer maxHops, @Nullable Filter inputFilters,
@Nullable SortCriterion sortCriterion, int from, int size) {
// Cache multihop result for faster performance
EntityLineageResult lineageResult = cache.get(Pair.of(sourceUrn, direction), EntityLineageResult.class);
EntityLineageResult lineageResult = cacheEnabled ? cache.get(Pair.of(sourceUrn, direction), EntityLineageResult.class)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a log.warn here if we found something in the cache and if the insert timestamp is from a long time ago (we can default to hard-coded 1 hour for this PR).

: null;
if (lineageResult == null) {
maxHops = maxHops != null ? maxHops : 1000;
lineageResult = _graphService.getLineage(sourceUrn, direction, 0, MAX_RELATIONSHIPS, maxHops);
if (cacheEnabled) {
cache.put(Pair.of(sourceUrn, direction), lineageResult);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we drop in the current timestamp into the value section?
So maybe

cache.put(Pair.of(sourceUrn,direction), Pair.of(lineageResult, currentTimestamp))

that way we can always know when a cache entry was inserted into the cache, at least for debugging purposes.

}
}

// Filter hopped result based on the set of entities to return and inputFilters before sending to search
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ public void setup() {
_elasticSearchService.configure();
_cacheManager = new ConcurrentMapCacheManager();
_graphService = mock(GraphService.class);
resetService();
resetService(true);
}

private void resetService() {
private void resetService(boolean withCache) {
CachingEntitySearchService cachingEntitySearchService = new CachingEntitySearchService(_cacheManager, _elasticSearchService, 100, true);
_lineageSearchService = new LineageSearchService(
new SearchService(
Expand All @@ -102,7 +102,7 @@ private void resetService() {
100,
true),
new SimpleRanker()),
_graphService, _cacheManager.getCache("test"));
_graphService, _cacheManager.getCache("test"), withCache);
}

@BeforeMethod
Expand All @@ -126,7 +126,7 @@ private ElasticSearchService buildEntitySearchService() {

private void clearCache() {
_cacheManager.getCacheNames().forEach(cache -> _cacheManager.getCache(cache).clear());
resetService();
resetService(true);
}

@AfterClass
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.linkedin.gms.factory.search;

import com.linkedin.gms.factory.common.GraphServiceFactory;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
import com.linkedin.metadata.graph.GraphService;
import com.linkedin.metadata.search.LineageSearchService;
Expand All @@ -21,22 +22,13 @@
@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
public class LineageSearchServiceFactory {

@Autowired
@Qualifier("searchService")
private SearchService searchService;

@Autowired
@Qualifier("graphService")
private GraphService graphService;

@Autowired
private CacheManager cacheManager;

@Bean(name = "relationshipSearchService")
@Primary
@Nonnull
protected LineageSearchService getInstance() {
protected LineageSearchService getInstance(CacheManager cacheManager, GraphService graphService,
SearchService searchService, ConfigurationProvider configurationProvider) {
boolean cacheEnabled = configurationProvider.getFeatureFlags().isLineageSearchCacheEnabled();
return new LineageSearchService(searchService, graphService,
cacheManager.getCache("relationshipSearchService"));
cacheEnabled ? cacheManager.getCache("relationshipSearchService") : null, cacheEnabled);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ bootstrap:

featureFlags:
showSimplifiedHomepageByDefault: ${SHOW_SIMPLIFIED_HOMEPAGE_BY_DEFAULT:false} # shows a simplified homepage with just datasets, charts and dashboards by default to users. this can be configured in user settings
lineageSearchCacheEnabled: ${LINEAGE_SEARCH_CACHE_ENABLED:false} # Enables in-memory cache for searchAcrossLineage query, disabled by default to prevent unexpected update delays

entityChangeEvents:
enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true}