-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12710 from influxdata/sgc/data-gen
Add data generation subcommand to influxd
- Loading branch information
Showing
40 changed files
with
4,631 additions
and
514 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
package generate | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"os" | ||
"time" | ||
|
||
"github.com/influxdata/influxdb/bolt" | ||
"github.com/influxdata/influxdb/cmd/influxd/internal/profile" | ||
"github.com/influxdata/influxdb/internal/fs" | ||
"github.com/influxdata/influxdb/pkg/data/gen" | ||
"github.com/spf13/cobra" | ||
) | ||
|
||
var Command = &cobra.Command{ | ||
Use: "generate <schema.toml>", | ||
Short: "Generate time series data sets using TOML schema", | ||
Long: ` | ||
This command will generate time series data direct to disk using schema | ||
defined in a TOML file. Use the help-schema subcommand to produce a TOML | ||
file to STDOUT, which includes documentation describing the available options. | ||
NOTES: | ||
* The influxd server should not be running when using the generate tool | ||
as it modifies the index and TSM data. | ||
* This tool is intended for development and testing purposes only and | ||
SHOULD NOT be run on a production server. | ||
`, | ||
Args: cobra.ExactArgs(1), | ||
RunE: generateFE, | ||
} | ||
|
||
var flags struct { | ||
printOnly bool | ||
storageSpec StorageSpec | ||
profile profile.Config | ||
} | ||
|
||
func init() { | ||
Command.Flags().SortFlags = false | ||
|
||
pfs := Command.PersistentFlags() | ||
pfs.SortFlags = false | ||
pfs.BoolVar(&flags.printOnly, "print", false, "Print data spec and exit") | ||
|
||
flags.storageSpec.AddFlags(Command, pfs) | ||
|
||
pfs.StringVar(&flags.profile.CPU, "cpuprofile", "", "Collect a CPU profile") | ||
pfs.StringVar(&flags.profile.Memory, "memprofile", "", "Collect a memory profile") | ||
} | ||
|
||
func generateFE(_ *cobra.Command, args []string) error { | ||
storagePlan, err := flags.storageSpec.Plan() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
storagePlan.PrintPlan(os.Stdout) | ||
|
||
spec, err := gen.NewSpecFromPath(args[0]) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if err = assignOrgBucket(spec); err != nil { | ||
return err | ||
} | ||
|
||
if flags.printOnly { | ||
return nil | ||
} | ||
|
||
return exec(storagePlan, spec) | ||
} | ||
|
||
func assignOrgBucket(spec *gen.Spec) error { | ||
boltFile, err := fs.BoltFile() | ||
if err != nil { | ||
return err | ||
} | ||
c := bolt.NewClient() | ||
c.Path = boltFile | ||
if err := c.Open(context.Background()); err != nil { | ||
return err | ||
} | ||
|
||
org, err := c.FindOrganizationByName(context.Background(), flags.storageSpec.Organization) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
bucket, err := c.FindBucketByName(context.Background(), org.ID, flags.storageSpec.Bucket) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
c.Close() | ||
|
||
spec.OrgID = org.ID | ||
spec.BucketID = bucket.ID | ||
|
||
return nil | ||
} | ||
|
||
func exec(storagePlan *StoragePlan, spec *gen.Spec) error { | ||
tr := gen.TimeRange{ | ||
Start: storagePlan.StartTime, | ||
End: storagePlan.EndTime, | ||
} | ||
sg := gen.NewSeriesGeneratorFromSpec(spec, tr) | ||
|
||
stop := flags.profile.Start() | ||
defer stop() | ||
|
||
var files []string | ||
start := time.Now().UTC() | ||
defer func() { | ||
elapsed := time.Since(start) | ||
fmt.Println() | ||
fmt.Println("Generated:") | ||
for _, f := range files { | ||
fmt.Println(f) | ||
} | ||
fmt.Println() | ||
fmt.Printf("Total time: %0.1f seconds\n", elapsed.Seconds()) | ||
}() | ||
|
||
path, err := fs.InfluxDir() | ||
if err != nil { | ||
return err | ||
} | ||
g := &Generator{Clean: storagePlan.Clean} | ||
files, err = g.Run(context.Background(), path, sg) | ||
return err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
package generate | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/spf13/cobra" | ||
) | ||
|
||
var helpSchemaCommand = &cobra.Command{ | ||
Use: "help-schema", | ||
Short: "Print a documented TOML schema to STDOUT", | ||
Run: func(cmd *cobra.Command, args []string) { | ||
fmt.Print(documentedSchema) | ||
}, | ||
} | ||
|
||
func init() { | ||
Command.AddCommand(helpSchemaCommand) | ||
} | ||
|
||
const documentedSchema = `title = "Documented schema" | ||
# limit the maximum number of series generated across all measurements | ||
# | ||
# series-limit: integer, optional (default: unlimited) | ||
[[measurements]] | ||
# name of measurement | ||
# | ||
# NOTE: | ||
# Multiple definitions of the same measurement name are allowed and | ||
# will be merged together. | ||
name = "cpu" | ||
# sample: float; where 0 < sample ≤ 1.0 (default: 0.5) | ||
# sample a subset of the tag set | ||
# | ||
# sample 25% of the tags | ||
# | ||
sample = 0.25 | ||
# Keys for defining a tag | ||
# | ||
# name: string, required | ||
# Name of field | ||
# | ||
# source: array<string> or object | ||
# | ||
# A literal array of string values defines the tag values. | ||
# | ||
# An object defines more complex generators. The type key determines the | ||
# type of generator. | ||
# | ||
# source types: | ||
# | ||
# type: "sequence" | ||
# generate a sequence of tag values | ||
# | ||
# format: string | ||
# a format string for the values (default: "value%s") | ||
# start: int (default: 0) | ||
# beginning value | ||
# count: int, required | ||
# ending value | ||
# | ||
# type: "file" | ||
# generate a sequence of tag values from a file source. | ||
# The data in the file is sorted, deduplicated and verified is valid UTF-8 | ||
# | ||
# path: string | ||
# absolute path or relative path to current toml file | ||
tags = [ | ||
# example sequence tag source. The range of values are automatically | ||
# prefixed with 0s | ||
# to ensure correct sort behavior. | ||
{ name = "host", source = { type = "sequence", format = "host-%s", start = 0, count = 5 } }, | ||
# tags can also be sourced from a file. The path is relative to the | ||
# schema.toml. | ||
# Each value must be on a new line. The file is also sorted, deduplicated | ||
# and UTF-8 validated. | ||
{ name = "rack", source = { type = "file", path = "files/racks.txt" } }, | ||
# Example string array source, which is also deduplicated and sorted | ||
{ name = "region", source = ["us-west-01","us-west-02","us-east"] }, | ||
] | ||
# Keys for defining a field | ||
# | ||
# name: string, required | ||
# Name of field | ||
# | ||
# count: int, required | ||
# The maximum number of values to generate. When multiple fields | ||
# have the same count and time-spec, they will share timestamps. | ||
# | ||
# A time-spec can be either time-precision or time-interval, which | ||
# determines how timestamps are generated and may also influence | ||
# the time range and number of values generated. | ||
# | ||
# time-precision: string [ns, us, ms, s, m, h] (default: ms) | ||
# Specifies the precision (rounding) for generated timestamps. | ||
# | ||
# If the precision results in fewer than "count" intervals for the | ||
# given time range the number of values will be reduced. | ||
# | ||
# Example: | ||
# count = 1000, start = 0s, end = 100s, time-precison = s | ||
# 100 values will be generated at [0s, 1s, 2s, ..., 99s] | ||
# | ||
# If the precision results in greater than "count" intervals for the | ||
# given time range, the interval will be rounded to the nearest multiple of | ||
# time-precision. | ||
# | ||
# Example: | ||
# count = 10, start = 0s, end = 100s, time-precison = s | ||
# 100 values will be generated at [0s, 10s, 20s, ..., 90s] | ||
# | ||
# time-interval: Go duration string (eg 90s, 1h30m) | ||
# Specifies the delta between generated timestamps. | ||
# | ||
# If the delta results in fewer than "count" intervals for the | ||
# given time range the number of values will be reduced. | ||
# | ||
# Example: | ||
# count = 100, start = 0s, end = 100s, time-interval = 10s | ||
# 10 values will be generated at [0s, 10s, 20s, ..., 90s] | ||
# | ||
# If the delta results in greater than "count" intervals for the | ||
# given time range, the start-time will be adjusted to ensure "count" values. | ||
# | ||
# Example: | ||
# count = 20, start = 0s, end = 1000s, time-interval = 10s | ||
# 20 values will be generated at [800s, 810s, ..., 900s, ..., 990s] | ||
# | ||
# source: int, float, boolean, string, array or object | ||
# | ||
# A literal int, float, boolean or string will produce | ||
# a constant value of the same data type. | ||
# | ||
# A literal array of homogeneous values will generate a repeating | ||
# sequence. | ||
# | ||
# An object defines more complex generators. The type key determines the | ||
# type of generator. | ||
# | ||
# source types: | ||
# | ||
# type: "rand<float>" | ||
# generate random float values | ||
# seed: seed to random number generator (default: 0) | ||
# min: minimum value (default: 0.0) | ||
# max: maximum value (default: 1.0) | ||
# | ||
# type: "zipf<integer>" | ||
# generate random integer values using a Zipf distribution | ||
# The generator generates values k ∈ [0, imax] such that P(k) | ||
# is proportional to (v + k) ** (-s). Requirements: s > 1 and v ≥ 1. | ||
# See https://golang.org/pkg/math/rand/#NewZipf for more information. | ||
# | ||
# seed: seed to random number generator (default: 0) | ||
# s: float > 1 (required) | ||
# v: float ≥ 1 (required) | ||
# imax: integer (required) | ||
# | ||
fields = [ | ||
# Example constant float | ||
{ name = "system", count = 5000, source = 2.5 }, | ||
# Example random floats | ||
{ name = "user", count = 5000, source = { type = "rand<float>", seed = 10, min = 0.0, max = 1.0 } }, | ||
] | ||
# Multiple measurements may be defined. | ||
[[measurements]] | ||
name = "mem" | ||
tags = [ | ||
{ name = "host", source = { type = "sequence", format = "host-%s", start = 0, count = 5 } }, | ||
{ name = "region", source = ["us-west-01","us-west-02","us-east"] }, | ||
] | ||
fields = [ | ||
# An example of a sequence of integer values | ||
{ name = "free", count = 100, source = [10,15,20,25,30,35,30], time-precision = "ms" }, | ||
{ name = "low_mem", count = 100, source = [false,true,true], time-precision = "ms" }, | ||
] | ||
` |
Oops, something went wrong.