Skip to content

Commit

Permalink
feat: add subgraph health endpoint (#449)
Browse files Browse the repository at this point in the history
  • Loading branch information
shiyasmohd authored Nov 7, 2024
1 parent 775f2f6 commit bab22af
Show file tree
Hide file tree
Showing 6 changed files with 329 additions and 3 deletions.
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,35 @@ curl -X POST \
}
```

## Subgraph health check
```bash
curl http://localhost:7600/subgraphs/health/QmVhiE4nax9i86UBnBmQCYDzvjWuwHShYh7aspGPQhU5Sj
```
```json
{
"health": "healthy"
}
```
## Unfound subgraph
```bash
curl http://localhost:7600/subgraphs/health/QmacQnSgia4iDPWHpeY6aWxesRFdb8o5DKZUx96zZqEWrB
```
```json
{
"error": "Deployment not found"
}
```
## Failed Subgraph
```bash
curl http://localhost:7600/subgraphs/health/QmVGSJyvjEjkk5U9EdxyyB78NCXK3EAoFhrzm6LV7SxxAm
```
```json
{
"fatalError": "transaction 21e77ed08fbc9df7be81101e9b03c2616494cee7cac2f6ad4f1ee387cf799e0c: error while executing at wasm backtrace:\t 0: 0x5972 - <unknown>!mappings/core/handleSwap: Mapping aborted at mappings/core.ts, line 73, column 16, with message: unexpected null in handler `handleSwap` at block #36654250 (5ab4d80c8e2cd628d5bf03abab4c302fd21d25d734e66afddff7a706b804fe13)",
"health": "failed"
}
```

# Network queries
## Checks for auth and configuration to serve-network-subgraph

Expand Down
97 changes: 97 additions & 0 deletions common/src/indexer_service/http/health.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright 2023-, Edge & Node, GraphOps, and Semiotic Labs.
// SPDX-License-Identifier: Apache-2.0

use axum::{
extract::Path,
response::{IntoResponse, Response as AxumResponse},
Extension, Json,
};
use graphql_client::GraphQLQuery;
use indexer_config::GraphNodeConfig;
use reqwest::StatusCode;
use serde_json::json;
use thiserror::Error;

#[derive(GraphQLQuery)]
#[graphql(
schema_path = "../graphql/indexing_status.schema.graphql",
query_path = "../graphql/subgraph_health.query.graphql",
response_derives = "Debug",
variables_derives = "Clone"
)]
pub struct HealthQuery;

#[derive(Debug, Error)]
pub enum CheckHealthError {
#[error("Failed to send request")]
RequestFailed,
#[error("Failed to decode response")]
BadResponse,
#[error("Deployment not found")]
DeploymentNotFound,
#[error("Invalid health status found")]
InvalidHealthStatus,
}

impl IntoResponse for CheckHealthError {
fn into_response(self) -> AxumResponse {
let status = match &self {
CheckHealthError::DeploymentNotFound => StatusCode::NOT_FOUND,
CheckHealthError::InvalidHealthStatus | CheckHealthError::BadResponse => {
StatusCode::INTERNAL_SERVER_ERROR
}
CheckHealthError::RequestFailed => StatusCode::BAD_GATEWAY,
};
let body = serde_json::json!({
"error": self.to_string(),
});
(status, Json(body)).into_response()
}
}

pub async fn health(
Path(deployment_id): Path<String>,
Extension(graph_node): Extension<GraphNodeConfig>,
) -> Result<impl IntoResponse, CheckHealthError> {
let req_body = HealthQuery::build_query(health_query::Variables {
ids: vec![deployment_id],
});

let client = reqwest::Client::new();
let response = client
.post(graph_node.status_url)
.json(&req_body)
.send()
.await
.map_err(|_| CheckHealthError::RequestFailed)?;

let graphql_response: graphql_client::Response<health_query::ResponseData> = response
.json()
.await
.map_err(|_| CheckHealthError::BadResponse)?;

let data = match (graphql_response.data, graphql_response.errors) {
(Some(data), None) => data,
_ => return Err(CheckHealthError::BadResponse),
};

let Some(status) = data.indexing_statuses.first() else {
return Err(CheckHealthError::DeploymentNotFound);
};
let health_response = match status.health {
health_query::Health::healthy => json!({ "health": status.health }),
health_query::Health::unhealthy => {
let errors: Vec<&String> = status
.non_fatal_errors
.iter()
.map(|msg| &msg.message)
.collect();
json!({ "health": status.health, "nonFatalErrors": errors })
}
health_query::Health::failed => {
json!({ "health": status.health, "fatalError": status.fatal_error.as_ref().map_or("null", |msg| &msg.message) })
}
health_query::Health::Other(_) => return Err(CheckHealthError::InvalidHealthStatus),
};
Ok(Json(health_response))
}
12 changes: 9 additions & 3 deletions common/src/indexer_service/http/indexer_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ use tower_http::{cors, cors::CorsLayer, normalize_path::NormalizePath, trace::Tr
use tracing::error;
use tracing::{info, info_span};

use super::request_handler::request_handler;
use crate::escrow_accounts::EscrowAccounts;
use crate::escrow_accounts::EscrowAccountsError;
use crate::indexer_service::http::health::health;
use crate::{
address::public_key,
indexer_service::http::static_subgraph::static_subgraph_request_handler,
Expand All @@ -44,8 +46,6 @@ use crate::{
},
tap::IndexerTapContext,
};

use super::request_handler::request_handler;
use indexer_config::Config;

pub trait IndexerServiceResponse {
Expand Down Expand Up @@ -386,7 +386,7 @@ impl IndexerService {
.route("/", get("Service is up and running"))
.route("/version", get(Json(options.release)))
.route("/info", get(operator_address))
.layer(misc_rate_limiter);
.layer(misc_rate_limiter.clone());

// Rate limits by allowing bursts of 50 requests and requiring 20ms of
// time between consecutive requests after that, effectively rate
Expand All @@ -401,6 +401,12 @@ impl IndexerService {
),
};

// Check subgraph Health
misc_routes = misc_routes
.route("/subgraph/health/:deployment_id", get(health))
.route_layer(Extension(options.config.graph_node.clone()))
.layer(misc_rate_limiter);

if options.config.service.serve_network_subgraph {
info!("Serving network subgraph at /network");

Expand Down
1 change: 1 addition & 0 deletions common/src/indexer_service/http/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright 2023-, Edge & Node, GraphOps, and Semiotic Labs.
// SPDX-License-Identifier: Apache-2.0

mod health;
mod indexer_service;
mod request_handler;
mod static_subgraph;
Expand Down
182 changes: 182 additions & 0 deletions graphql/indexing_status.schema.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
schema {
query: Query
}

type ApiVersion {
"""
Version number in SemVer format
"""
version: String!
}

scalar BigInt

type Block {
hash: Bytes!
number: BigInt!
}

input BlockInput {
hash: Bytes!
number: BigInt!
}

scalar Bytes

type CachedEthereumCall {
idHash: Bytes!
block: Block!
contractAddress: Bytes!
returnValue: Bytes!
}

interface ChainIndexingStatus {
network: String!
chainHeadBlock: Block
earliestBlock: EarliestBlock
latestBlock: Block
lastHealthyBlock: Block
}

scalar Date

type EarliestBlock {
hash: Bytes!
number: BigInt!
}

type EntityChanges {
updates: [EntityTypeUpdates!]!
deletions: [EntityTypeDeletions!]!
}

type EntityTypeDeletions {
type: String!
entities: [ID!]!
}

type EntityTypeUpdates {
type: String!
entities: [JSONObject!]!
}

type EthereumIndexingStatus implements ChainIndexingStatus {
network: String!
chainHeadBlock: Block
earliestBlock: EarliestBlock
latestBlock: Block
lastHealthyBlock: Block
}

enum Feature {
nonFatalErrors
grafting
fullTextSearch
ipfsOnEthereumContracts
aggregations
declaredEthCalls
immutableEntities
bytesAsIds
}

enum Health {
"""Subgraph syncing normally"""
healthy
"""Subgraph syncing but with errors"""
unhealthy
"""Subgraph halted due to errors"""
failed
}

scalar JSONObject

type PartialBlock {
hash: Bytes
number: BigInt!
}

input ProofOfIndexingRequest {
deployment: String!
block: BlockInput!
}

type ProofOfIndexingResult {
deployment: String!
block: Block!
"""
There may not be a proof of indexing available for the deployment and block
"""
proofOfIndexing: Bytes
}

input PublicProofOfIndexingRequest {
deployment: String!
blockNumber: BigInt!
}

type PublicProofOfIndexingResult {
deployment: String!
block: PartialBlock!
proofOfIndexing: Bytes!
}

type Query {
indexingStatusForCurrentVersion(subgraphName: String!): SubgraphIndexingStatus
indexingStatusForPendingVersion(subgraphName: String!): SubgraphIndexingStatus
indexingStatusesForSubgraphName(subgraphName: String!): [SubgraphIndexingStatus!]!
indexingStatuses(subgraphs: [String!]): [SubgraphIndexingStatus!]!
proofOfIndexing(subgraph: String!, blockNumber: Int!, blockHash: Bytes!, indexer: Bytes): Bytes
"""
Proofs of indexing for several deployments and blocks that can be shared and
compared in public without revealing the _actual_ proof of indexing that every
indexer has in their database
"""
publicProofsOfIndexing(requests: [PublicProofOfIndexingRequest!]!): [PublicProofOfIndexingResult!]!
subgraphFeatures(subgraphId: String!): SubgraphFeatures!
entityChangesInBlock(subgraphId: String!, blockNumber: Int!): EntityChanges!
blockData(network: String!, blockHash: Bytes!): JSONObject
blockHashFromNumber(network: String!, blockNumber: Int!): Bytes
version: Version!
cachedEthereumCalls(network: String!, blockHash: Bytes!): [CachedEthereumCall!]
apiVersions(subgraphId: String!): [ApiVersion!]!
}

type SubgraphError {
message: String!
block: Block
handler: String
deterministic: Boolean!
}

type SubgraphFeatures {
apiVersion: String
specVersion: String!
features: [Feature!]!
dataSources: [String!]!
handlers: [String!]!
network: String
}

type SubgraphIndexingStatus {
subgraph: String!
synced: Boolean!
health: Health!
"""If the subgraph has failed, this is the error caused it"""
fatalError: SubgraphError
"""Sorted from first to last, limited to first 1000"""
nonFatalErrors: [SubgraphError!]!
chains: [ChainIndexingStatus!]!
entityCount: BigInt!
"""null if deployment is not assigned to an indexing node"""
node: String
"""null if deployment is not assigned to an indexing node"""
paused: Boolean
historyBlocks: Int!
}

type Version {
version: String!
commit: String!
}
11 changes: 11 additions & 0 deletions graphql/subgraph_health.query.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
query HealthQuery($ids: [String!]!) {
indexingStatuses(subgraphs: $ids) {
health
fatalError {
message
}
nonFatalErrors {
message
}
}
}

0 comments on commit bab22af

Please sign in to comment.