From c0a91b9d9f6dbecaa718a95b2aeb5facbbfdd96f Mon Sep 17 00:00:00 2001 From: Ian Norden Date: Mon, 9 Nov 2020 10:29:04 -0600 Subject: [PATCH 1/2] updated canonical header finder functions --- ...pdated_canonical_hash_finder_functions.sql | 110 ++++++++++ db/schema.sql | 205 ++++++++++++++---- pkg/eth/backend.go | 6 +- pkg/eth/ipld_retriever.go | 4 +- pkg/rpc/check.go | 16 ++ pkg/rpc/http.go | 16 ++ pkg/rpc/ipc.go | 16 ++ pkg/rpc/ws.go | 16 ++ 8 files changed, 339 insertions(+), 50 deletions(-) create mode 100644 db/migrations/00019_updated_canonical_hash_finder_functions.sql diff --git a/db/migrations/00019_updated_canonical_hash_finder_functions.sql b/db/migrations/00019_updated_canonical_hash_finder_functions.sql new file mode 100644 index 000000000..fe7e24597 --- /dev/null +++ b/db/migrations/00019_updated_canonical_hash_finder_functions.sql @@ -0,0 +1,110 @@ +-- +goose Up +ALTER FUNCTION canonical_header RENAME TO canonical_header_id; + +-- +goose StatementBegin +CREATE TYPE child_result AS ( + has_child BOOLEAN, + children eth.header_cids[] +); + +CREATE OR REPLACE FUNCTION has_child(hash VARCHAR(66)) RETURNS child_result AS +$BODY$ +DECLARE + new_child_result child_result; +BEGIN + -- short circuit if there are no children + SELECT exists(SELECT 1 + FROM eth.header_cids + WHERE parent_hash = hash + LIMIT 1) + INTO new_child_result.has_child; + -- collect all the children for this header + IF new_child_result.has_child THEN + SELECT * + INTO new_child_result.children + FROM eth.header_cids + WHERE parent_hash = hash; + ELSE + END IF; + RETURN new_child_result; +END +$BODY$ +LANGUAGE 'plpgsql'; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE OR REPLACE FUNCTION canonical_header_from_set(headers eth.header_cids[]) RETURNS eth.header_cids AS +$BODY$ +DECLARE + canonical_header eth.header_cids; + canonical_child eth.header_cids; + header eth.header_cids; + current_child_result child_result; + child_headers eth.header_cids[]; + current_header_with_child eth.header_cids; + has_children_count INT DEFAULT 0; +BEGIN + -- for each header in the provided set + FOR header IN SELECT * FROM headers + LOOP + -- check if it has any children + SELECT * INTO current_child_result FROM has_child(header.block_hash); + IF current_child_result.has_child THEN + -- if it does, take note + has_children_count = has_children_count + 1; + current_header_with_child = header; + -- and add the children to the growing set of child headers + child_headers = array_cat(child_headers, current_child_result.children); + END IF; + END LOOP; + -- if none of the headers had children, none is more canonical than the other + IF has_children_count = 0 THEN + -- return the first one selected + SELECT * INTO canonical_header FROM headers LIMIT 1; + -- if only one header had children, it can be considered the heaviest/canonical header of the set + ELSIF has_children_count = 1 THEN + -- return the only header with a child + canonical_header = current_header_with_child; + -- if there are multiple headers with children + ELSE + -- find the canonical the canonical header from the child set + canonical_child = canonical_header_from_set(child_headers); + -- the header that is parent to this header, is the canonical header at this level + SELECT * INTO canonical_header FROM headers + WHERE block_hash = canonical_child.parent_hash; + END IF; + RETURN canonical_header; +END +$BODY$ +LANGUAGE 'plpgsql'; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE OR REPLACE FUNCTION canonical_header(height BIGINT) RETURNS eth.header_cids AS +$BODY$ +DECLARE + header_count INT; + headers eth.header_cids[]; +BEGIN + -- collect every header at this height, noting how many are collected + SELECT *, count(*) + INTO headers, header_count + FROM eth.header_cids + WHERE block_number = height; + -- if only one header is present, it can be considered canonical (if no header is present we will throw an error) + IF header_count = 1 THEN + RETURN headers[0]; + END IF; + -- otherwise, if there are multiple headers at this height, we need to determine which is canonical + RETURN canonical_header_from_set(headers); +END +$BODY$ +LANGUAGE 'plpgsql'; +-- +goose StatementEnd + +-- +goose Down +DROP FUNCTION canonical_header; +DROP FUNCTION canonical_header_from_set; +DROP FUNCTION has_child; +DROP TYPE child_result; +ALTER FUNCTION canonical_header_id RENAME TO canonical_header; diff --git a/db/schema.sql b/db/schema.sql index 91be03f02..54b6d0a25 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -2,8 +2,8 @@ -- PostgreSQL database dump -- --- Dumped from database version 12.1 --- Dumped by pg_dump version 12.1 +-- Dumped from database version 12.4 +-- Dumped by pg_dump version 12.4 SET statement_timeout = 0; SET lock_timeout = 0; @@ -23,6 +23,58 @@ SET row_security = off; CREATE SCHEMA eth; +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: header_cids; Type: TABLE; Schema: eth; Owner: - +-- + +CREATE TABLE eth.header_cids ( + id integer NOT NULL, + block_number bigint NOT NULL, + block_hash character varying(66) NOT NULL, + parent_hash character varying(66) NOT NULL, + cid text NOT NULL, + mh_key text NOT NULL, + td numeric NOT NULL, + node_id integer NOT NULL, + reward numeric NOT NULL, + state_root character varying(66) NOT NULL, + tx_root character varying(66) NOT NULL, + receipt_root character varying(66) NOT NULL, + uncle_root character varying(66) NOT NULL, + bloom bytea NOT NULL, + "timestamp" numeric NOT NULL, + times_validated integer DEFAULT 1 NOT NULL +); + + +-- +-- Name: TABLE header_cids; Type: COMMENT; Schema: eth; Owner: - +-- + +COMMENT ON TABLE eth.header_cids IS '@name EthHeaderCids'; + + +-- +-- Name: COLUMN header_cids.node_id; Type: COMMENT; Schema: eth; Owner: - +-- + +COMMENT ON COLUMN eth.header_cids.node_id IS '@name EthNodeID'; + + +-- +-- Name: child_result; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.child_result AS ( + has_child boolean, + children eth.header_cids[] +); + + -- -- Name: graphql_subscription(); Type: FUNCTION; Schema: eth; Owner: - -- @@ -55,7 +107,83 @@ $_$; -- Name: canonical_header(bigint); Type: FUNCTION; Schema: public; Owner: - -- -CREATE FUNCTION public.canonical_header(height bigint) RETURNS integer +CREATE FUNCTION public.canonical_header(height bigint) RETURNS eth.header_cids + LANGUAGE plpgsql + AS $$ +DECLARE + header_count INT; + headers eth.header_cids[]; +BEGIN + -- collect every header at this height, noting how many are collected + SELECT *, count(*) + INTO headers, header_count + FROM eth.header_cids + WHERE block_number = height; + -- if only one header is present, it can be considered canonical (if no header is present we will throw an error) + IF header_count = 1 THEN + RETURN headers[0]; + END IF; + -- otherwise, if there are multiple headers at this height, we need to determine which is canonical + RETURN canonical_header_from_set(headers); +END +$$; + + +-- +-- Name: canonical_header_from_set(eth.header_cids[]); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.canonical_header_from_set(headers eth.header_cids[]) RETURNS eth.header_cids + LANGUAGE plpgsql + AS $$ +DECLARE + canonical_header eth.header_cids; + canonical_child eth.header_cids; + header eth.header_cids; + current_child_result child_result; + child_headers eth.header_cids[]; + current_header_with_child eth.header_cids; + has_children_count INT DEFAULT 0; +BEGIN + -- for each header in the provided set + FOR header IN SELECT * FROM headers + LOOP + -- check if it has any children + SELECT * INTO current_child_result FROM has_child(header.block_hash); + IF current_child_result.has_child THEN + -- if it does, take note + has_children_count = has_children_count + 1; + current_header_with_child = header; + -- and add the children to the growing set of child headers + child_headers = array_cat(child_headers, current_child_result.children); + END IF; + END LOOP; + -- if none of the headers had children, none is more canonical than the other + IF has_children_count = 0 THEN + -- return the first one selected + SELECT * INTO canonical_header FROM headers LIMIT 1; + -- if only one header had children, it can be considered the heaviest/canonical header of the set + ELSIF has_children_count = 1 THEN + -- return the only header with a child + canonical_header = current_header_with_child; + -- if there are multiple headers with children + ELSE + -- find the canonical the canonical header from the child set + canonical_child = canonical_header_from_set(child_headers); + -- the header that is parent to this header, is the canonical header at this level + SELECT * INTO canonical_header FROM headers + WHERE block_hash = canonical_child.parent_hash; + END IF; + RETURN canonical_header; +END +$$; + + +-- +-- Name: canonical_header_id(bigint); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.canonical_header_id(height bigint) RETURNS integer LANGUAGE plpgsql AS $$ DECLARE @@ -78,6 +206,35 @@ END $$; +-- +-- Name: has_child(character varying); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.has_child(hash character varying) RETURNS public.child_result + LANGUAGE plpgsql + AS $$ +DECLARE + new_child_result child_result; +BEGIN + -- short circuit if there are no children + SELECT exists(SELECT 1 + FROM eth.header_cids + WHERE parent_hash = hash + LIMIT 1) + INTO new_child_result.has_child; + -- collect all the children for this header + IF new_child_result.has_child THEN + SELECT * + INTO new_child_result.children + FROM eth.header_cids + WHERE parent_hash = hash; + ELSE + END IF; + RETURN new_child_result; +END +$$; + + -- -- Name: header_weight(character varying); Type: FUNCTION; Schema: public; Owner: - -- @@ -139,48 +296,6 @@ CREATE FUNCTION public.was_storage_removed(path bytea, height bigint, hash chara $$; -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: header_cids; Type: TABLE; Schema: eth; Owner: - --- - -CREATE TABLE eth.header_cids ( - id integer NOT NULL, - block_number bigint NOT NULL, - block_hash character varying(66) NOT NULL, - parent_hash character varying(66) NOT NULL, - cid text NOT NULL, - mh_key text NOT NULL, - td numeric NOT NULL, - node_id integer NOT NULL, - reward numeric NOT NULL, - state_root character varying(66) NOT NULL, - tx_root character varying(66) NOT NULL, - receipt_root character varying(66) NOT NULL, - uncle_root character varying(66) NOT NULL, - bloom bytea NOT NULL, - "timestamp" numeric NOT NULL, - times_validated integer DEFAULT 1 NOT NULL -); - - --- --- Name: TABLE header_cids; Type: COMMENT; Schema: eth; Owner: - --- - -COMMENT ON TABLE eth.header_cids IS '@name EthHeaderCids'; - - --- --- Name: COLUMN header_cids.node_id; Type: COMMENT; Schema: eth; Owner: - --- - -COMMENT ON COLUMN eth.header_cids.node_id IS '@name EthNodeID'; - - -- -- Name: header_cids_id_seq; Type: SEQUENCE; Schema: eth; Owner: - -- diff --git a/pkg/eth/backend.go b/pkg/eth/backend.go index 0cb4c52cf..b5d9f7972 100644 --- a/pkg/eth/backend.go +++ b/pkg/eth/backend.go @@ -55,10 +55,10 @@ var ( const ( RetrieveCanonicalBlockHashByNumber = `SELECT block_hash FROM eth.header_cids INNER JOIN public.blocks ON (header_cids.mh_key = blocks.key) - WHERE id = (SELECT canonical_header($1))` + WHERE id = (SELECT canonical_header_id($1))` RetrieveCanonicalHeaderByNumber = `SELECT cid, data FROM eth.header_cids INNER JOIN public.blocks ON (header_cids.mh_key = blocks.key) - WHERE id = (SELECT canonical_header($1))` + WHERE id = (SELECT canonical_header_id($1))` RetrieveTD = `SELECT td FROM eth.header_cids WHERE header_cids.block_hash = $1` RetrieveRPCTransaction = `SELECT blocks.data, block_hash, block_number, index FROM public.blocks, eth.transaction_cids, eth.header_cids @@ -72,7 +72,7 @@ const ( AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = $2) - AND header_cids.id = (SELECT canonical_header(block_number)) + AND header_cids.id = (SELECT canonical_header_id(block_number)) ORDER BY block_number DESC LIMIT 1` RetrieveCodeByMhKey = `SELECT data FROM public.blocks WHERE key = $1` diff --git a/pkg/eth/ipld_retriever.go b/pkg/eth/ipld_retriever.go index 8418bfe6f..caf5dad59 100644 --- a/pkg/eth/ipld_retriever.go +++ b/pkg/eth/ipld_retriever.go @@ -108,7 +108,7 @@ const ( AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = $2) - AND header_cids.id = (SELECT canonical_header(block_number)) + AND header_cids.id = (SELECT canonical_header_id(block_number)) ORDER BY block_number DESC LIMIT 1` RetrieveAccountByLeafKeyAndBlockNumberPgStr = `SELECT state_cids.cid, @@ -151,7 +151,7 @@ const ( AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = $3) - AND header_cids.id = (SELECT canonical_header(block_number)) + AND header_cids.id = (SELECT canonical_header_id(block_number)) ORDER BY block_number DESC LIMIT 1` ) diff --git a/pkg/rpc/check.go b/pkg/rpc/check.go index 754f752ee..0921568ab 100644 --- a/pkg/rpc/check.go +++ b/pkg/rpc/check.go @@ -1,3 +1,19 @@ +// VulcanizeDB +// Copyright © 2020 Vulcanize + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package rpc import "github.com/ethereum/go-ethereum/rpc" diff --git a/pkg/rpc/http.go b/pkg/rpc/http.go index ae1ab03e4..5cb85ca00 100644 --- a/pkg/rpc/http.go +++ b/pkg/rpc/http.go @@ -1,3 +1,19 @@ +// VulcanizeDB +// Copyright © 2020 Vulcanize + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package rpc import ( diff --git a/pkg/rpc/ipc.go b/pkg/rpc/ipc.go index 4e170ebed..7b8e1c280 100644 --- a/pkg/rpc/ipc.go +++ b/pkg/rpc/ipc.go @@ -1,3 +1,19 @@ +// VulcanizeDB +// Copyright © 2020 Vulcanize + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package rpc import ( diff --git a/pkg/rpc/ws.go b/pkg/rpc/ws.go index cff19ab96..585cab627 100644 --- a/pkg/rpc/ws.go +++ b/pkg/rpc/ws.go @@ -1,3 +1,19 @@ +// VulcanizeDB +// Copyright © 2020 Vulcanize + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package rpc import ( From b11fb949f5031d6f4cd819c585929cc3df8bb1a4 Mon Sep 17 00:00:00 2001 From: Ian Norden Date: Thu, 12 Nov 2020 15:33:06 -0600 Subject: [PATCH 2/2] final touches, test canonicty functions and get tests passing again --- ...0018_create_check_if_removed_functions.sql | 10 +- ...pdated_canonical_hash_finder_functions.sql | 73 +++++++----- db/schema.sql | 108 ++++++++++-------- pkg/eth/eth_state_test.go | 57 +++++++-- pkg/eth/test_helpers/test_data.go | 27 ++++- 5 files changed, 183 insertions(+), 92 deletions(-) diff --git a/db/migrations/00018_create_check_if_removed_functions.sql b/db/migrations/00018_create_check_if_removed_functions.sql index 9a7e42d31..eff7cc2c9 100644 --- a/db/migrations/00018_create_check_if_removed_functions.sql +++ b/db/migrations/00018_create_check_if_removed_functions.sql @@ -3,7 +3,7 @@ -- returns if a storage node at the provided path was removed in the range > the provided height and <= the provided block hash CREATE OR REPLACE FUNCTION was_storage_removed(path BYTEA, height BIGINT, hash VARCHAR(66)) RETURNS BOOLEAN AS $$ - SELECT exists(SELECT * + SELECT exists(SELECT 1 FROM eth.storage_cids INNER JOIN eth.state_cids ON (storage_cids.state_id = state_cids.id) INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id) @@ -12,7 +12,8 @@ AS $$ AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = hash) - AND storage_cids.node_type = 3); + AND storage_cids.node_type = 3 + LIMIT 1); $$ LANGUAGE SQL; -- +goose StatementEnd @@ -20,7 +21,7 @@ $$ LANGUAGE SQL; -- returns if a state node at the provided path was removed in the range > the provided height and <= the provided block hash CREATE OR REPLACE FUNCTION was_state_removed(path BYTEA, height BIGINT, hash VARCHAR(66)) RETURNS BOOLEAN AS $$ - SELECT exists(SELECT * + SELECT exists(SELECT 1 FROM eth.state_cids INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id) WHERE state_path = path @@ -28,7 +29,8 @@ AS $$ AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = hash) - AND state_cids.node_type = 3); + AND state_cids.node_type = 3 + LIMIT 1); $$ LANGUAGE SQL; -- +goose StatementEnd diff --git a/db/migrations/00019_updated_canonical_hash_finder_functions.sql b/db/migrations/00019_updated_canonical_hash_finder_functions.sql index fe7e24597..5e7db32cd 100644 --- a/db/migrations/00019_updated_canonical_hash_finder_functions.sql +++ b/db/migrations/00019_updated_canonical_hash_finder_functions.sql @@ -1,30 +1,32 @@ -- +goose Up -ALTER FUNCTION canonical_header RENAME TO canonical_header_id; - -- +goose StatementBegin CREATE TYPE child_result AS ( has_child BOOLEAN, children eth.header_cids[] ); -CREATE OR REPLACE FUNCTION has_child(hash VARCHAR(66)) RETURNS child_result AS +CREATE OR REPLACE FUNCTION has_child(hash VARCHAR(66), height BIGINT) RETURNS child_result AS $BODY$ DECLARE + child_height INT; + temp_child eth.header_cids; new_child_result child_result; BEGIN + child_height = height + 1; -- short circuit if there are no children SELECT exists(SELECT 1 FROM eth.header_cids WHERE parent_hash = hash + AND block_number = child_height LIMIT 1) INTO new_child_result.has_child; -- collect all the children for this header IF new_child_result.has_child THEN - SELECT * - INTO new_child_result.children - FROM eth.header_cids - WHERE parent_hash = hash; - ELSE + FOR temp_child IN + SELECT * FROM eth.header_cids WHERE parent_hash = hash AND block_number = child_height + LOOP + new_child_result.children = array_append(new_child_result.children, temp_child); + END LOOP; END IF; RETURN new_child_result; END @@ -33,7 +35,7 @@ LANGUAGE 'plpgsql'; -- +goose StatementEnd -- +goose StatementBegin -CREATE OR REPLACE FUNCTION canonical_header_from_set(headers eth.header_cids[]) RETURNS eth.header_cids AS +CREATE OR REPLACE FUNCTION canonical_header_from_array(headers eth.header_cids[]) RETURNS eth.header_cids AS $BODY$ DECLARE canonical_header eth.header_cids; @@ -45,10 +47,10 @@ DECLARE has_children_count INT DEFAULT 0; BEGIN -- for each header in the provided set - FOR header IN SELECT * FROM headers + FOREACH header IN ARRAY headers LOOP -- check if it has any children - SELECT * INTO current_child_result FROM has_child(header.block_hash); + current_child_result = has_child(header.block_hash, header.block_number); IF current_child_result.has_child THEN -- if it does, take note has_children_count = has_children_count + 1; @@ -60,17 +62,17 @@ BEGIN -- if none of the headers had children, none is more canonical than the other IF has_children_count = 0 THEN -- return the first one selected - SELECT * INTO canonical_header FROM headers LIMIT 1; + SELECT * INTO canonical_header FROM unnest(headers) LIMIT 1; -- if only one header had children, it can be considered the heaviest/canonical header of the set ELSIF has_children_count = 1 THEN -- return the only header with a child canonical_header = current_header_with_child; -- if there are multiple headers with children ELSE - -- find the canonical the canonical header from the child set - canonical_child = canonical_header_from_set(child_headers); + -- find the canonical header from the child set + canonical_child = canonical_header_from_array(child_headers); -- the header that is parent to this header, is the canonical header at this level - SELECT * INTO canonical_header FROM headers + SELECT * INTO canonical_header FROM unnest(headers) WHERE block_hash = canonical_child.parent_hash; END IF; RETURN canonical_header; @@ -80,31 +82,40 @@ LANGUAGE 'plpgsql'; -- +goose StatementEnd -- +goose StatementBegin -CREATE OR REPLACE FUNCTION canonical_header(height BIGINT) RETURNS eth.header_cids AS +CREATE OR REPLACE FUNCTION canonical_header_id(height BIGINT) RETURNS INTEGER AS $BODY$ DECLARE - header_count INT; + canonical_header eth.header_cids; headers eth.header_cids[]; + header_count INT; + temp_header eth.header_cids; BEGIN - -- collect every header at this height, noting how many are collected - SELECT *, count(*) - INTO headers, header_count - FROM eth.header_cids - WHERE block_number = height; - -- if only one header is present, it can be considered canonical (if no header is present we will throw an error) - IF header_count = 1 THEN - RETURN headers[0]; + -- collect all headers at this height + FOR temp_header IN + SELECT * FROM eth.header_cids WHERE block_number = height + LOOP + headers = array_append(headers, temp_header); + END LOOP; + -- count the number of headers collected + header_count = array_length(headers, 1); + -- if we have less than 1 header, return NULL + IF header_count IS NULL OR header_count < 1 THEN + RETURN NULL; + -- if we have one header, return its id + ELSIF header_count = 1 THEN + RETURN headers[1].id; + -- if we have multiple headers we need to determine which one is canonical + ELSE + canonical_header = canonical_header_from_array(headers); + RETURN canonical_header.id; END IF; - -- otherwise, if there are multiple headers at this height, we need to determine which is canonical - RETURN canonical_header_from_set(headers); -END +END; $BODY$ LANGUAGE 'plpgsql'; -- +goose StatementEnd -- +goose Down -DROP FUNCTION canonical_header; -DROP FUNCTION canonical_header_from_set; +DROP FUNCTION canonical_header_id; +DROP FUNCTION canonical_header_from_array; DROP FUNCTION has_child; DROP TYPE child_result; -ALTER FUNCTION canonical_header_id RENAME TO canonical_header; diff --git a/db/schema.sql b/db/schema.sql index 54b6d0a25..d7af0d1c6 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -107,33 +107,34 @@ $_$; -- Name: canonical_header(bigint); Type: FUNCTION; Schema: public; Owner: - -- -CREATE FUNCTION public.canonical_header(height bigint) RETURNS eth.header_cids +CREATE FUNCTION public.canonical_header(height bigint) RETURNS integer LANGUAGE plpgsql AS $$ DECLARE - header_count INT; - headers eth.header_cids[]; + current_weight INT; + heaviest_weight INT DEFAULT 0; + heaviest_id INT; + r eth.header_cids%ROWTYPE; BEGIN - -- collect every header at this height, noting how many are collected - SELECT *, count(*) - INTO headers, header_count - FROM eth.header_cids - WHERE block_number = height; - -- if only one header is present, it can be considered canonical (if no header is present we will throw an error) - IF header_count = 1 THEN - RETURN headers[0]; - END IF; - -- otherwise, if there are multiple headers at this height, we need to determine which is canonical - RETURN canonical_header_from_set(headers); + FOR r IN SELECT * FROM eth.header_cids + WHERE block_number = height + LOOP + SELECT INTO current_weight * FROM header_weight(r.block_hash); + IF current_weight > heaviest_weight THEN + heaviest_weight := current_weight; + heaviest_id := r.id; + END IF; + END LOOP; + RETURN heaviest_id; END $$; -- --- Name: canonical_header_from_set(eth.header_cids[]); Type: FUNCTION; Schema: public; Owner: - +-- Name: canonical_header_from_array(eth.header_cids[]); Type: FUNCTION; Schema: public; Owner: - -- -CREATE FUNCTION public.canonical_header_from_set(headers eth.header_cids[]) RETURNS eth.header_cids +CREATE FUNCTION public.canonical_header_from_array(headers eth.header_cids[]) RETURNS eth.header_cids LANGUAGE plpgsql AS $$ DECLARE @@ -146,10 +147,10 @@ DECLARE has_children_count INT DEFAULT 0; BEGIN -- for each header in the provided set - FOR header IN SELECT * FROM headers + FOREACH header IN ARRAY headers LOOP -- check if it has any children - SELECT * INTO current_child_result FROM has_child(header.block_hash); + current_child_result = has_child(header.block_hash, header.block_number); IF current_child_result.has_child THEN -- if it does, take note has_children_count = has_children_count + 1; @@ -161,17 +162,17 @@ BEGIN -- if none of the headers had children, none is more canonical than the other IF has_children_count = 0 THEN -- return the first one selected - SELECT * INTO canonical_header FROM headers LIMIT 1; + SELECT * INTO canonical_header FROM unnest(headers) LIMIT 1; -- if only one header had children, it can be considered the heaviest/canonical header of the set ELSIF has_children_count = 1 THEN -- return the only header with a child canonical_header = current_header_with_child; -- if there are multiple headers with children ELSE - -- find the canonical the canonical header from the child set - canonical_child = canonical_header_from_set(child_headers); + -- find the canonical header from the child set + canonical_child = canonical_header_from_array(child_headers); -- the header that is parent to this header, is the canonical header at this level - SELECT * INTO canonical_header FROM headers + SELECT * INTO canonical_header FROM unnest(headers) WHERE block_hash = canonical_child.parent_hash; END IF; RETURN canonical_header; @@ -187,48 +188,61 @@ CREATE FUNCTION public.canonical_header_id(height bigint) RETURNS integer LANGUAGE plpgsql AS $$ DECLARE - current_weight INT; - heaviest_weight INT DEFAULT 0; - heaviest_id INT; - r eth.header_cids%ROWTYPE; + canonical_header eth.header_cids; + headers eth.header_cids[]; + header_count INT; + temp_header eth.header_cids; BEGIN - FOR r IN SELECT * FROM eth.header_cids - WHERE block_number = height + -- collect all headers at this height + FOR temp_header IN + SELECT * FROM eth.header_cids WHERE block_number = height LOOP - SELECT INTO current_weight * FROM header_weight(r.block_hash); - IF current_weight > heaviest_weight THEN - heaviest_weight := current_weight; - heaviest_id := r.id; - END IF; + headers = array_append(headers, temp_header); END LOOP; - RETURN heaviest_id; -END + -- count the number of headers collected + header_count = array_length(headers, 1); + -- if we have less than 1 header, return NULL + IF header_count IS NULL OR header_count < 1 THEN + RETURN NULL; + -- if we have one header, return its id + ELSIF header_count = 1 THEN + RETURN headers[1].id; + -- if we have multiple headers we need to determine which one is canonical + ELSE + canonical_header = canonical_header_from_array(headers); + RETURN canonical_header.id; + END IF; +END; $$; -- --- Name: has_child(character varying); Type: FUNCTION; Schema: public; Owner: - +-- Name: has_child(character varying, bigint); Type: FUNCTION; Schema: public; Owner: - -- -CREATE FUNCTION public.has_child(hash character varying) RETURNS public.child_result +CREATE FUNCTION public.has_child(hash character varying, height bigint) RETURNS public.child_result LANGUAGE plpgsql AS $$ DECLARE + child_height INT; + temp_child eth.header_cids; new_child_result child_result; BEGIN + child_height = height + 1; -- short circuit if there are no children SELECT exists(SELECT 1 FROM eth.header_cids WHERE parent_hash = hash + AND block_number = child_height LIMIT 1) INTO new_child_result.has_child; -- collect all the children for this header IF new_child_result.has_child THEN - SELECT * - INTO new_child_result.children - FROM eth.header_cids - WHERE parent_hash = hash; - ELSE + FOR temp_child IN + SELECT * FROM eth.header_cids WHERE parent_hash = hash AND block_number = child_height + LOOP + new_child_result.children = array_append(new_child_result.children, temp_child); + END LOOP; END IF; RETURN new_child_result; END @@ -264,7 +278,7 @@ $$; CREATE FUNCTION public.was_state_removed(path bytea, height bigint, hash character varying) RETURNS boolean LANGUAGE sql AS $$ - SELECT exists(SELECT * + SELECT exists(SELECT 1 FROM eth.state_cids INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id) WHERE state_path = path @@ -272,7 +286,8 @@ CREATE FUNCTION public.was_state_removed(path bytea, height bigint, hash charact AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = hash) - AND state_cids.node_type = 3); + AND state_cids.node_type = 3 + LIMIT 1); $$; @@ -283,7 +298,7 @@ $$; CREATE FUNCTION public.was_storage_removed(path bytea, height bigint, hash character varying) RETURNS boolean LANGUAGE sql AS $$ - SELECT exists(SELECT * + SELECT exists(SELECT 1 FROM eth.storage_cids INNER JOIN eth.state_cids ON (storage_cids.state_id = state_cids.id) INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id) @@ -292,7 +307,8 @@ CREATE FUNCTION public.was_storage_removed(path bytea, height bigint, hash chara AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = hash) - AND storage_cids.node_type = 3); + AND storage_cids.node_type = 3 + LIMIT 1); $$; diff --git a/pkg/eth/eth_state_test.go b/pkg/eth/eth_state_test.go index 58148816d..d8ad427af 100644 --- a/pkg/eth/eth_state_test.go +++ b/pkg/eth/eth_state_test.go @@ -61,14 +61,15 @@ func init() { var _ = Describe("eth state reading tests", func() { var ( - blocks []*types.Block - receipts []types.Receipts - chain *core.BlockChain - db *postgres.DB - api *eth.PublicEthAPI - backend *eth.Backend - chainConfig = params.TestChainConfig - mockTD = big.NewInt(1337) + blocks []*types.Block + receipts []types.Receipts + chain *core.BlockChain + db *postgres.DB + api *eth.PublicEthAPI + backend *eth.Backend + chainConfig = params.TestChainConfig + mockTD = big.NewInt(1337) + expectedCanonicalHeader map[string]interface{} ) It("test init", func() { // db and type initializations @@ -90,6 +91,27 @@ var _ = Describe("eth state reading tests", func() { IntermediateStateNodes: true, IntermediateStorageNodes: true, } + canonicalHeader := blocks[1].Header() + expectedCanonicalHeader = map[string]interface{}{ + "number": (*hexutil.Big)(canonicalHeader.Number), + "hash": canonicalHeader.Hash(), + "parentHash": canonicalHeader.ParentHash, + "nonce": canonicalHeader.Nonce, + "mixHash": canonicalHeader.MixDigest, + "sha3Uncles": canonicalHeader.UncleHash, + "logsBloom": canonicalHeader.Bloom, + "stateRoot": canonicalHeader.Root, + "miner": canonicalHeader.Coinbase, + "difficulty": (*hexutil.Big)(canonicalHeader.Difficulty), + "extraData": hexutil.Bytes([]byte{}), + "size": hexutil.Uint64(canonicalHeader.Size()), + "gasLimit": hexutil.Uint64(canonicalHeader.GasLimit), + "gasUsed": hexutil.Uint64(canonicalHeader.GasUsed), + "timestamp": hexutil.Uint64(canonicalHeader.Time), + "transactionsRoot": canonicalHeader.TxHash, + "receiptsRoot": canonicalHeader.ReceiptHash, + "totalDifficulty": (*hexutil.Big)(mockTD), + } // iterate over the blocks, generating statediff payloads, and transforming the data into Postgres builder := statediff.NewBuilder(chain.StateCache()) for i, block := range blocks { @@ -128,6 +150,17 @@ var _ = Describe("eth state reading tests", func() { _, err = transformer.Transform(0, payload) Expect(err).ToNot(HaveOccurred()) } + + // Insert some non-canonical data into the database so that we test our ability to discern canonicity + indexAndPublisher := eth2.NewIPLDPublisher(db) + api = eth.NewPublicEthAPI(backend, nil) + err = indexAndPublisher.Publish(test_helpers.MockConvertedPayload) + Expect(err).ToNot(HaveOccurred()) + // The non-canonical header has a child + err = indexAndPublisher.Publish(test_helpers.MockConvertedPayloadForChild) + Expect(err).ToNot(HaveOccurred()) + err = publishCode(db, test_helpers.ContractCodeHash, test_helpers.ContractCode) + Expect(err).ToNot(HaveOccurred()) }) defer It("test teardown", func() { eth.TearDownDB(db) @@ -423,4 +456,12 @@ var _ = Describe("eth state reading tests", func() { Expect(val).To(Equal(hexutil.Bytes{})) }) }) + + Describe("eth_getHeaderByNumber", func() { + It("Finds the canonical header based on the header's weight relative to others at the provided height", func() { + header, err := api.GetHeaderByNumber(ctx, number) + Expect(err).ToNot(HaveOccurred()) + Expect(header).To(Equal(expectedCanonicalHeader)) + }) + }) }) diff --git a/pkg/eth/test_helpers/test_data.go b/pkg/eth/test_helpers/test_data.go index c86854d7e..212caf661 100644 --- a/pkg/eth/test_helpers/test_data.go +++ b/pkg/eth/test_helpers/test_data.go @@ -77,9 +77,21 @@ var ( Extra: []byte{}, }, } - ReceiptsRlp, _ = rlp.EncodeToBytes(MockReceipts) - MockBlock = types.NewBlock(&MockHeader, MockTransactions, MockUncles, MockReceipts) - MockHeaderRlp, _ = rlp.EncodeToBytes(MockBlock.Header()) + ReceiptsRlp, _ = rlp.EncodeToBytes(MockReceipts) + MockBlock = types.NewBlock(&MockHeader, MockTransactions, MockUncles, MockReceipts) + MockHeaderRlp, _ = rlp.EncodeToBytes(MockBlock.Header()) + MockChildHeader = types.Header{ + Time: 0, + Number: new(big.Int).Add(BlockNumber, common.Big1), + Root: common.HexToHash("0x0"), + TxHash: common.HexToHash("0x0"), + ReceiptHash: common.HexToHash("0x0"), + Difficulty: big.NewInt(5000001), + Extra: []byte{}, + ParentHash: MockBlock.Header().Hash(), + } + MockChild = types.NewBlock(&MockChildHeader, MockTransactions, MockUncles, MockReceipts) + MockChildRlp, _ = rlp.EncodeToBytes(MockChild.Header()) Address = common.HexToAddress("0xaE9BEa628c4Ce503DcFD7E305CaB4e29E7476592") AnotherAddress = common.HexToAddress("0xaE9BEa628c4Ce503DcFD7E305CaB4e29E7476593") ContractAddress = crypto.CreateAddress(SenderAddr, MockTransactions[2].Nonce()) @@ -351,6 +363,15 @@ var ( StorageNodes: MockStorageNodes, StateNodes: MockStateNodes, } + MockConvertedPayloadForChild = eth.ConvertedPayload{ + TotalDifficulty: MockChild.Difficulty(), + Block: MockChild, + Receipts: MockReceipts, + TxMetaData: MockTrxMeta, + ReceiptMetaData: MockRctMeta, + StorageNodes: MockStorageNodes, + StateNodes: MockStateNodes, + } MockCIDWrapper = ð2.CIDWrapper{ BlockNumber: new(big.Int).Set(BlockNumber),