-- -- Creates Cassandra keyspace with tables for traces and dependencies. -- -- Required parameters: -- -- keyspace -- name of the keyspace -- replication -- replication strategy for the keyspace, such as -- for prod environments -- {'class': 'NetworkTopologyStrategy', '$datacenter': '${replication_factor}' } -- for test environments -- {'class': 'SimpleStrategy', 'replication_factor': '1'} -- trace_ttl -- default time to live for trace data, in seconds -- dependencies_ttl -- default time to live for dependencies data, in seconds (0 for no TTL) -- -- Non-configurable settings: -- gc_grace_seconds is non-zero, see: http://www.uberobert.com/cassandra_gc_grace_disables_hinted_handoff/ -- For TTL of 2 days, compaction window is 1 hour, rule of thumb here: http://thelastpickle.com/blog/2016/12/08/TWCS-part1.html CREATE KEYSPACE IF NOT EXISTS mykeyspace WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}; CREATE TYPE IF NOT EXISTS mykeyspace.keyvalue ( key text, value_type text, value_string text, value_bool boolean, value_long bigint, value_double double, value_binary blob ); CREATE TYPE IF NOT EXISTS mykeyspace.log ( ts bigint, -- microseconds since epoch fields frozen>> ); CREATE TYPE IF NOT EXISTS mykeyspace.span_ref ( ref_type text, trace_id blob, span_id bigint ); CREATE TYPE IF NOT EXISTS mykeyspace.process ( service_name text, tags frozen>> ); -- Notice we have span_hash. This exists only for zipkin backwards compat. Zipkin allows spans with the same ID. -- Note: Cassandra re-orders non-PK columns alphabetically, so the table looks differently in CQLSH "describe table". -- start_time is bigint instead of timestamp as we require microsecond precision CREATE TABLE IF NOT EXISTS mykeyspace.traces ( trace_id blob, span_id bigint, span_hash bigint, parent_id bigint, operation_name text, flags int, start_time bigint, -- microseconds since epoch duration bigint, -- microseconds tags list>, logs list>, refs list>, process frozen, PRIMARY KEY (trace_id, span_id, span_hash) ) WITH compaction = { 'compaction_window_size': '2', 'compaction_window_unit': 'DAYS', 'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy' } AND default_time_to_live = 604800 AND speculative_retry = 'NONE' AND gc_grace_seconds = 0; -- 3 hours of downtime acceptable on nodes CREATE TABLE IF NOT EXISTS mykeyspace.service_names ( service_name text, PRIMARY KEY (service_name) ) WITH compaction = { 'min_threshold': '4', 'max_threshold': '32', 'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy' } AND default_time_to_live = 604800 AND speculative_retry = 'NONE' AND gc_grace_seconds = 0; -- 3 hours of downtime acceptable on nodes CREATE TABLE IF NOT EXISTS mykeyspace.operation_names_v2 ( service_name text, span_kind text, operation_name text, PRIMARY KEY ((service_name), span_kind, operation_name) ) WITH compaction = { 'min_threshold': '4', 'max_threshold': '32', 'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy' } AND default_time_to_live = 604800 AND speculative_retry = 'NONE' AND gc_grace_seconds = 0; -- 3 hours of downtime acceptable on nodes -- index of trace IDs by service + operation names, sorted by span start_time. CREATE TABLE IF NOT EXISTS mykeyspace.service_operation_index ( service_name text, operation_name text, start_time bigint, -- microseconds since epoch trace_id blob, PRIMARY KEY ((service_name, operation_name), start_time) ) WITH CLUSTERING ORDER BY (start_time DESC) AND compaction = { 'compaction_window_size': '1', 'compaction_window_unit': 'HOURS', 'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy' } AND default_time_to_live = 604800 AND speculative_retry = 'NONE' AND gc_grace_seconds = 0; -- 3 hours of downtime acceptable on nodes CREATE TABLE IF NOT EXISTS mykeyspace.service_name_index ( service_name text, bucket int, start_time bigint, -- microseconds since epoch trace_id blob, PRIMARY KEY ((service_name, bucket), start_time) ) WITH CLUSTERING ORDER BY (start_time DESC) AND compaction = { 'compaction_window_size': '1', 'compaction_window_unit': 'HOURS', 'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy' } AND default_time_to_live = 604800 AND speculative_retry = 'NONE' AND gc_grace_seconds = 0; -- 3 hours of downtime acceptable on nodes CREATE TABLE IF NOT EXISTS mykeyspace.duration_index ( service_name text, -- service name operation_name text, -- operation name, or blank for queries without span name bucket timestamp, -- time bucket, - the start_time of the given span rounded to an hour duration bigint, -- span duration, in microseconds start_time bigint, -- microseconds since epoch trace_id blob, PRIMARY KEY ((service_name, operation_name, bucket), duration, start_time, trace_id) ) WITH CLUSTERING ORDER BY (duration DESC, start_time DESC) AND compaction = { 'compaction_window_size': '1', 'compaction_window_unit': 'HOURS', 'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy' } AND default_time_to_live = 604800 AND speculative_retry = 'NONE' AND gc_grace_seconds = 0; -- 3 hours of downtime acceptable on nodes -- a bucketing strategy may have to be added for tag queries -- we can make this table even better by adding a timestamp to it CREATE TABLE IF NOT EXISTS mykeyspace.tag_index ( service_name text, tag_key text, tag_value text, start_time bigint, -- microseconds since epoch trace_id blob, span_id bigint, PRIMARY KEY ((service_name, tag_key, tag_value), start_time, trace_id, span_id) ) WITH CLUSTERING ORDER BY (start_time DESC) AND compaction = { 'compaction_window_size': '1', 'compaction_window_unit': 'HOURS', 'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy' } AND default_time_to_live = 604800 AND speculative_retry = 'NONE' AND gc_grace_seconds = 0; -- 3 hours of downtime acceptable on nodes CREATE TYPE IF NOT EXISTS mykeyspace.dependency ( parent text, child text, call_count bigint, source text ); -- compaction strategy is intentionally different as compared to other tables due to the size of dependencies data CREATE TABLE IF NOT EXISTS mykeyspace.dependencies_v2 ( ts_bucket timestamp, ts timestamp, dependencies list>, PRIMARY KEY (ts_bucket, ts) ) WITH CLUSTERING ORDER BY (ts DESC) AND compaction = { 'min_threshold': '4', 'max_threshold': '32', 'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy' } AND default_time_to_live = 0; -- adaptive sampling tables -- ./plugin/storage/cassandra/samplingstore/storage.go CREATE TABLE IF NOT EXISTS mykeyspace.operation_throughput ( bucket int, ts timeuuid, throughput text, PRIMARY KEY(bucket, ts) ) WITH CLUSTERING ORDER BY (ts desc); CREATE TABLE IF NOT EXISTS mykeyspace.sampling_probabilities ( bucket int, ts timeuuid, hostname text, probabilities text, PRIMARY KEY(bucket, ts) ) WITH CLUSTERING ORDER BY (ts desc); -- distributed lock -- ./plugin/pkg/distributedlock/cassandra/lock.go CREATE TABLE IF NOT EXISTS mykeyspace.leases ( name text, owner text, PRIMARY KEY (name) );