Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add combined health check endpoint which can check multiple components #8191

Merged
merged 6 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,9 @@ public class ConfigurationProvider {
* Configuration for caching
*/
private CacheConfiguration cache;

/**
* Configuration for the health check server
*/
private HealthCheckConfiguration healthCheck;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.linkedin.gms.factory.config;

import lombok.Data;


@Data
public class HealthCheckConfiguration {
private int cacheDurationSeconds;
}
3 changes: 3 additions & 0 deletions metadata-service/factories/src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ systemUpdate:
backOffFactor: ${BOOTSTRAP_SYSTEM_UPDATE_BACK_OFF_FACTOR:2} # Multiplicative factor for back off, default values will result in waiting 5min 15s
waitForSystemUpdate: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:true}

healthCheck:
cacheDurationSeconds: ${HEALTH_CHECK_CACHE_DURATION_SECONDS:5}

featureFlags:
showSimplifiedHomepageByDefault: ${SHOW_SIMPLIFIED_HOMEPAGE_BY_DEFAULT:false} # shows a simplified homepage with just datasets, charts and dashboards by default to users. this can be configured in user settings
lineageSearchCacheEnabled: ${LINEAGE_SEARCH_CACHE_ENABLED:true} # Enables in-memory cache for searchAcrossLineage query
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
package com.datahub.health.controller;

import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import io.swagger.v3.oas.annotations.tags.Tag;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.client.RequestOptions;
Expand All @@ -26,11 +32,46 @@ public class HealthCheckController {
@Autowired
@Qualifier("elasticSearchRestHighLevelClient")
private RestHighLevelClient elasticClient;
private Supplier<ResponseEntity<String>> memoizedSupplier;
private final Supplier<ResponseEntity<String>> memoizedSupplier;

public HealthCheckController(ConfigurationProvider config) {

public HealthCheckController() {
this.memoizedSupplier = Suppliers.memoizeWithExpiration(
this::getElasticHealth, 5, TimeUnit.SECONDS);
this::getElasticHealth, config.getHealthCheck().getCacheDurationSeconds(), TimeUnit.SECONDS);
}

/**
* Combined health check endpoint for checking GMS clients.
* For now, just checks the health of the ElasticSearch client
* @return A ResponseEntity with a Map of String (component name) to ResponseEntity (the health check status of
* that component). The status code will be 200 if all components are okay, and 500 if one or more components are not
* healthy.
*/
@GetMapping(path = "/ready", produces = MediaType.APPLICATION_JSON_VALUE)
public ResponseEntity<Map<String, ResponseEntity<String>>> getCombinedHealthCheck(String... checks) {

Map<String, Supplier<ResponseEntity<String>>> healthChecks = new HashMap<>();
healthChecks.put("elasticsearch", this::getElasticHealthWithCache);
// Add new components here

List<String> componentsToCheck = checks != null && checks.length > 0
? Arrays.asList(checks)
: new ArrayList<>(healthChecks.keySet());

Map<String, ResponseEntity<String>> componentHealth = new HashMap<>();
for (String check : componentsToCheck) {
componentHealth.put(check,
healthChecks.getOrDefault(check,
() -> ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("Unrecognized component " + check))
.get());
}


boolean isHealthy = componentHealth.values().stream().allMatch(resp -> resp.getStatusCode() == HttpStatus.OK);
if (isHealthy) {
return ResponseEntity.ok(componentHealth);
}
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(componentHealth);
iprentic marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand All @@ -43,8 +84,8 @@ public ResponseEntity<String> getElasticHealthWithCache() {
}

/**
*
* @return
* Query ElasticSearch health endpoint
* @return A response including the result from ElasticSearch
*/
private ResponseEntity<String> getElasticHealth() {
String responseString = null;
Expand Down