diff --git a/README.md b/README.md index 9c49cffb..6bff30df 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,35 @@ curl -X POST \ } ``` +## Subgraph health check +```bash +curl http://localhost:7600/subgraphs/health/QmVhiE4nax9i86UBnBmQCYDzvjWuwHShYh7aspGPQhU5Sj +``` +```json +{ + "health": "healthy" +} +``` +## Unfound subgraph +```bash +curl http://localhost:7600/subgraphs/health/QmacQnSgia4iDPWHpeY6aWxesRFdb8o5DKZUx96zZqEWrB +``` +```json +{ + "error": "Deployment not found" +} +``` +## Failed Subgraph +```bash +curl http://localhost:7600/subgraphs/health/QmVGSJyvjEjkk5U9EdxyyB78NCXK3EAoFhrzm6LV7SxxAm +``` +```json +{ + "fatalError": "transaction 21e77ed08fbc9df7be81101e9b03c2616494cee7cac2f6ad4f1ee387cf799e0c: error while executing at wasm backtrace:\t 0: 0x5972 - !mappings/core/handleSwap: Mapping aborted at mappings/core.ts, line 73, column 16, with message: unexpected null in handler `handleSwap` at block #36654250 (5ab4d80c8e2cd628d5bf03abab4c302fd21d25d734e66afddff7a706b804fe13)", + "health": "failed" +} +``` + # Network queries ## Checks for auth and configuration to serve-network-subgraph diff --git a/common/src/indexer_service/http/health.rs b/common/src/indexer_service/http/health.rs new file mode 100644 index 00000000..d6111257 --- /dev/null +++ b/common/src/indexer_service/http/health.rs @@ -0,0 +1,97 @@ +// Copyright 2023-, Edge & Node, GraphOps, and Semiotic Labs. +// SPDX-License-Identifier: Apache-2.0 + +use axum::{ + extract::Path, + response::{IntoResponse, Response as AxumResponse}, + Extension, Json, +}; +use graphql_client::GraphQLQuery; +use indexer_config::GraphNodeConfig; +use reqwest::StatusCode; +use serde_json::json; +use thiserror::Error; + +#[derive(GraphQLQuery)] +#[graphql( + schema_path = "../graphql/indexing_status.schema.graphql", + query_path = "../graphql/subgraph_health.query.graphql", + response_derives = "Debug", + variables_derives = "Clone" +)] +pub struct HealthQuery; + +#[derive(Debug, Error)] +pub enum CheckHealthError { + #[error("Failed to send request")] + RequestFailed, + #[error("Failed to decode response")] + BadResponse, + #[error("Deployment not found")] + DeploymentNotFound, + #[error("Invalid health status found")] + InvalidHealthStatus, +} + +impl IntoResponse for CheckHealthError { + fn into_response(self) -> AxumResponse { + let status = match &self { + CheckHealthError::DeploymentNotFound => StatusCode::NOT_FOUND, + CheckHealthError::InvalidHealthStatus | CheckHealthError::BadResponse => { + StatusCode::INTERNAL_SERVER_ERROR + } + CheckHealthError::RequestFailed => StatusCode::BAD_GATEWAY, + }; + let body = serde_json::json!({ + "error": self.to_string(), + }); + (status, Json(body)).into_response() + } +} + +pub async fn health( + Path(deployment_id): Path, + Extension(graph_node): Extension, +) -> Result { + let req_body = HealthQuery::build_query(health_query::Variables { + ids: vec![deployment_id], + }); + + let client = reqwest::Client::new(); + let response = client + .post(graph_node.status_url) + .json(&req_body) + .send() + .await + .map_err(|_| CheckHealthError::RequestFailed)?; + + let graphql_response: graphql_client::Response = response + .json() + .await + .map_err(|_| CheckHealthError::BadResponse)?; + + let data = match (graphql_response.data, graphql_response.errors) { + (Some(data), None) => data, + _ => return Err(CheckHealthError::BadResponse), + }; + + let Some(status) = data.indexing_statuses.first() else { + return Err(CheckHealthError::DeploymentNotFound); + }; + let health_response = match status.health { + health_query::Health::healthy => json!({ "health": status.health }), + health_query::Health::unhealthy => { + let errors: Vec<&String> = status + .non_fatal_errors + .iter() + .map(|msg| &msg.message) + .collect(); + json!({ "health": status.health, "nonFatalErrors": errors }) + } + health_query::Health::failed => { + json!({ "health": status.health, "fatalError": status.fatal_error.as_ref().map_or("null", |msg| &msg.message) }) + } + health_query::Health::Other(_) => return Err(CheckHealthError::InvalidHealthStatus), + }; + Ok(Json(health_response)) +} diff --git a/common/src/indexer_service/http/indexer_service.rs b/common/src/indexer_service/http/indexer_service.rs index 623223ff..e9e64853 100644 --- a/common/src/indexer_service/http/indexer_service.rs +++ b/common/src/indexer_service/http/indexer_service.rs @@ -33,8 +33,10 @@ use tower_http::{cors, cors::CorsLayer, normalize_path::NormalizePath, trace::Tr use tracing::error; use tracing::{info, info_span}; +use super::request_handler::request_handler; use crate::escrow_accounts::EscrowAccounts; use crate::escrow_accounts::EscrowAccountsError; +use crate::indexer_service::http::health::health; use crate::{ address::public_key, indexer_service::http::static_subgraph::static_subgraph_request_handler, @@ -44,8 +46,6 @@ use crate::{ }, tap::IndexerTapContext, }; - -use super::request_handler::request_handler; use indexer_config::Config; pub trait IndexerServiceResponse { @@ -386,7 +386,7 @@ impl IndexerService { .route("/", get("Service is up and running")) .route("/version", get(Json(options.release))) .route("/info", get(operator_address)) - .layer(misc_rate_limiter); + .layer(misc_rate_limiter.clone()); // Rate limits by allowing bursts of 50 requests and requiring 20ms of // time between consecutive requests after that, effectively rate @@ -401,6 +401,12 @@ impl IndexerService { ), }; + // Check subgraph Health + misc_routes = misc_routes + .route("/subgraph/health/:deployment_id", get(health)) + .route_layer(Extension(options.config.graph_node.clone())) + .layer(misc_rate_limiter); + if options.config.service.serve_network_subgraph { info!("Serving network subgraph at /network"); diff --git a/common/src/indexer_service/http/mod.rs b/common/src/indexer_service/http/mod.rs index 2c1da686..04baef2e 100644 --- a/common/src/indexer_service/http/mod.rs +++ b/common/src/indexer_service/http/mod.rs @@ -1,6 +1,7 @@ // Copyright 2023-, Edge & Node, GraphOps, and Semiotic Labs. // SPDX-License-Identifier: Apache-2.0 +mod health; mod indexer_service; mod request_handler; mod static_subgraph; diff --git a/graphql/indexing_status.schema.graphql b/graphql/indexing_status.schema.graphql new file mode 100644 index 00000000..e1156d5a --- /dev/null +++ b/graphql/indexing_status.schema.graphql @@ -0,0 +1,182 @@ +schema { + query: Query +} + +type ApiVersion { + """ + Version number in SemVer format + + """ + version: String! +} + +scalar BigInt + +type Block { + hash: Bytes! + number: BigInt! +} + +input BlockInput { + hash: Bytes! + number: BigInt! +} + +scalar Bytes + +type CachedEthereumCall { + idHash: Bytes! + block: Block! + contractAddress: Bytes! + returnValue: Bytes! +} + +interface ChainIndexingStatus { + network: String! + chainHeadBlock: Block + earliestBlock: EarliestBlock + latestBlock: Block + lastHealthyBlock: Block +} + +scalar Date + +type EarliestBlock { + hash: Bytes! + number: BigInt! +} + +type EntityChanges { + updates: [EntityTypeUpdates!]! + deletions: [EntityTypeDeletions!]! +} + +type EntityTypeDeletions { + type: String! + entities: [ID!]! +} + +type EntityTypeUpdates { + type: String! + entities: [JSONObject!]! +} + +type EthereumIndexingStatus implements ChainIndexingStatus { + network: String! + chainHeadBlock: Block + earliestBlock: EarliestBlock + latestBlock: Block + lastHealthyBlock: Block +} + +enum Feature { + nonFatalErrors + grafting + fullTextSearch + ipfsOnEthereumContracts + aggregations + declaredEthCalls + immutableEntities + bytesAsIds +} + +enum Health { + """Subgraph syncing normally""" + healthy + """Subgraph syncing but with errors""" + unhealthy + """Subgraph halted due to errors""" + failed +} + +scalar JSONObject + +type PartialBlock { + hash: Bytes + number: BigInt! +} + +input ProofOfIndexingRequest { + deployment: String! + block: BlockInput! +} + +type ProofOfIndexingResult { + deployment: String! + block: Block! + """ + There may not be a proof of indexing available for the deployment and block + """ + proofOfIndexing: Bytes +} + +input PublicProofOfIndexingRequest { + deployment: String! + blockNumber: BigInt! +} + +type PublicProofOfIndexingResult { + deployment: String! + block: PartialBlock! + proofOfIndexing: Bytes! +} + +type Query { + indexingStatusForCurrentVersion(subgraphName: String!): SubgraphIndexingStatus + indexingStatusForPendingVersion(subgraphName: String!): SubgraphIndexingStatus + indexingStatusesForSubgraphName(subgraphName: String!): [SubgraphIndexingStatus!]! + indexingStatuses(subgraphs: [String!]): [SubgraphIndexingStatus!]! + proofOfIndexing(subgraph: String!, blockNumber: Int!, blockHash: Bytes!, indexer: Bytes): Bytes + """ + Proofs of indexing for several deployments and blocks that can be shared and + compared in public without revealing the _actual_ proof of indexing that every + indexer has in their database + + """ + publicProofsOfIndexing(requests: [PublicProofOfIndexingRequest!]!): [PublicProofOfIndexingResult!]! + subgraphFeatures(subgraphId: String!): SubgraphFeatures! + entityChangesInBlock(subgraphId: String!, blockNumber: Int!): EntityChanges! + blockData(network: String!, blockHash: Bytes!): JSONObject + blockHashFromNumber(network: String!, blockNumber: Int!): Bytes + version: Version! + cachedEthereumCalls(network: String!, blockHash: Bytes!): [CachedEthereumCall!] + apiVersions(subgraphId: String!): [ApiVersion!]! +} + +type SubgraphError { + message: String! + block: Block + handler: String + deterministic: Boolean! +} + +type SubgraphFeatures { + apiVersion: String + specVersion: String! + features: [Feature!]! + dataSources: [String!]! + handlers: [String!]! + network: String +} + +type SubgraphIndexingStatus { + subgraph: String! + synced: Boolean! + health: Health! + """If the subgraph has failed, this is the error caused it""" + fatalError: SubgraphError + """Sorted from first to last, limited to first 1000""" + nonFatalErrors: [SubgraphError!]! + chains: [ChainIndexingStatus!]! + entityCount: BigInt! + """null if deployment is not assigned to an indexing node""" + node: String + """null if deployment is not assigned to an indexing node""" + paused: Boolean + historyBlocks: Int! +} + +type Version { + version: String! + commit: String! +} \ No newline at end of file diff --git a/graphql/subgraph_health.query.graphql b/graphql/subgraph_health.query.graphql new file mode 100644 index 00000000..22ab6cab --- /dev/null +++ b/graphql/subgraph_health.query.graphql @@ -0,0 +1,11 @@ +query HealthQuery($ids: [String!]!) { + indexingStatuses(subgraphs: $ids) { + health + fatalError { + message + } + nonFatalErrors { + message + } + } +} \ No newline at end of file