Skip to content

Commit

Permalink
Merge pull request #12710 from influxdata/sgc/data-gen
Browse files Browse the repository at this point in the history
Add data generation subcommand to influxd
  • Loading branch information
stuartcarnie authored Mar 20, 2019
2 parents 47882c8 + 5b2e467 commit 8abb76c
Show file tree
Hide file tree
Showing 40 changed files with 4,631 additions and 514 deletions.
139 changes: 139 additions & 0 deletions cmd/influxd/generate/command.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package generate

import (
"context"
"fmt"
"os"
"time"

"github.com/influxdata/influxdb/bolt"
"github.com/influxdata/influxdb/cmd/influxd/internal/profile"
"github.com/influxdata/influxdb/internal/fs"
"github.com/influxdata/influxdb/pkg/data/gen"
"github.com/spf13/cobra"
)

var Command = &cobra.Command{
Use: "generate <schema.toml>",
Short: "Generate time series data sets using TOML schema",
Long: `
This command will generate time series data direct to disk using schema
defined in a TOML file. Use the help-schema subcommand to produce a TOML
file to STDOUT, which includes documentation describing the available options.
NOTES:
* The influxd server should not be running when using the generate tool
as it modifies the index and TSM data.
* This tool is intended for development and testing purposes only and
SHOULD NOT be run on a production server.
`,
Args: cobra.ExactArgs(1),
RunE: generateFE,
}

var flags struct {
printOnly bool
storageSpec StorageSpec
profile profile.Config
}

func init() {
Command.Flags().SortFlags = false

pfs := Command.PersistentFlags()
pfs.SortFlags = false
pfs.BoolVar(&flags.printOnly, "print", false, "Print data spec and exit")

flags.storageSpec.AddFlags(Command, pfs)

pfs.StringVar(&flags.profile.CPU, "cpuprofile", "", "Collect a CPU profile")
pfs.StringVar(&flags.profile.Memory, "memprofile", "", "Collect a memory profile")
}

func generateFE(_ *cobra.Command, args []string) error {
storagePlan, err := flags.storageSpec.Plan()
if err != nil {
return err
}

storagePlan.PrintPlan(os.Stdout)

spec, err := gen.NewSpecFromPath(args[0])
if err != nil {
return err
}

if err = assignOrgBucket(spec); err != nil {
return err
}

if flags.printOnly {
return nil
}

return exec(storagePlan, spec)
}

func assignOrgBucket(spec *gen.Spec) error {
boltFile, err := fs.BoltFile()
if err != nil {
return err
}
c := bolt.NewClient()
c.Path = boltFile
if err := c.Open(context.Background()); err != nil {
return err
}

org, err := c.FindOrganizationByName(context.Background(), flags.storageSpec.Organization)
if err != nil {
return err
}

bucket, err := c.FindBucketByName(context.Background(), org.ID, flags.storageSpec.Bucket)
if err != nil {
return err
}

c.Close()

spec.OrgID = org.ID
spec.BucketID = bucket.ID

return nil
}

func exec(storagePlan *StoragePlan, spec *gen.Spec) error {
tr := gen.TimeRange{
Start: storagePlan.StartTime,
End: storagePlan.EndTime,
}
sg := gen.NewSeriesGeneratorFromSpec(spec, tr)

stop := flags.profile.Start()
defer stop()

var files []string
start := time.Now().UTC()
defer func() {
elapsed := time.Since(start)
fmt.Println()
fmt.Println("Generated:")
for _, f := range files {
fmt.Println(f)
}
fmt.Println()
fmt.Printf("Total time: %0.1f seconds\n", elapsed.Seconds())
}()

path, err := fs.InfluxDir()
if err != nil {
return err
}
g := &Generator{Clean: storagePlan.Clean}
files, err = g.Run(context.Background(), path, sg)
return err
}
187 changes: 187 additions & 0 deletions cmd/influxd/generate/command_helpschema.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
package generate

import (
"fmt"

"github.com/spf13/cobra"
)

var helpSchemaCommand = &cobra.Command{
Use: "help-schema",
Short: "Print a documented TOML schema to STDOUT",
Run: func(cmd *cobra.Command, args []string) {
fmt.Print(documentedSchema)
},
}

func init() {
Command.AddCommand(helpSchemaCommand)
}

const documentedSchema = `title = "Documented schema"
# limit the maximum number of series generated across all measurements
#
# series-limit: integer, optional (default: unlimited)
[[measurements]]
# name of measurement
#
# NOTE:
# Multiple definitions of the same measurement name are allowed and
# will be merged together.
name = "cpu"
# sample: float; where 0 < sample ≤ 1.0 (default: 0.5)
# sample a subset of the tag set
#
# sample 25% of the tags
#
sample = 0.25
# Keys for defining a tag
#
# name: string, required
# Name of field
#
# source: array<string> or object
#
# A literal array of string values defines the tag values.
#
# An object defines more complex generators. The type key determines the
# type of generator.
#
# source types:
#
# type: "sequence"
# generate a sequence of tag values
#
# format: string
# a format string for the values (default: "value%s")
# start: int (default: 0)
# beginning value
# count: int, required
# ending value
#
# type: "file"
# generate a sequence of tag values from a file source.
# The data in the file is sorted, deduplicated and verified is valid UTF-8
#
# path: string
# absolute path or relative path to current toml file
tags = [
# example sequence tag source. The range of values are automatically
# prefixed with 0s
# to ensure correct sort behavior.
{ name = "host", source = { type = "sequence", format = "host-%s", start = 0, count = 5 } },
# tags can also be sourced from a file. The path is relative to the
# schema.toml.
# Each value must be on a new line. The file is also sorted, deduplicated
# and UTF-8 validated.
{ name = "rack", source = { type = "file", path = "files/racks.txt" } },
# Example string array source, which is also deduplicated and sorted
{ name = "region", source = ["us-west-01","us-west-02","us-east"] },
]
# Keys for defining a field
#
# name: string, required
# Name of field
#
# count: int, required
# The maximum number of values to generate. When multiple fields
# have the same count and time-spec, they will share timestamps.
#
# A time-spec can be either time-precision or time-interval, which
# determines how timestamps are generated and may also influence
# the time range and number of values generated.
#
# time-precision: string [ns, us, ms, s, m, h] (default: ms)
# Specifies the precision (rounding) for generated timestamps.
#
# If the precision results in fewer than "count" intervals for the
# given time range the number of values will be reduced.
#
# Example:
# count = 1000, start = 0s, end = 100s, time-precison = s
# 100 values will be generated at [0s, 1s, 2s, ..., 99s]
#
# If the precision results in greater than "count" intervals for the
# given time range, the interval will be rounded to the nearest multiple of
# time-precision.
#
# Example:
# count = 10, start = 0s, end = 100s, time-precison = s
# 100 values will be generated at [0s, 10s, 20s, ..., 90s]
#
# time-interval: Go duration string (eg 90s, 1h30m)
# Specifies the delta between generated timestamps.
#
# If the delta results in fewer than "count" intervals for the
# given time range the number of values will be reduced.
#
# Example:
# count = 100, start = 0s, end = 100s, time-interval = 10s
# 10 values will be generated at [0s, 10s, 20s, ..., 90s]
#
# If the delta results in greater than "count" intervals for the
# given time range, the start-time will be adjusted to ensure "count" values.
#
# Example:
# count = 20, start = 0s, end = 1000s, time-interval = 10s
# 20 values will be generated at [800s, 810s, ..., 900s, ..., 990s]
#
# source: int, float, boolean, string, array or object
#
# A literal int, float, boolean or string will produce
# a constant value of the same data type.
#
# A literal array of homogeneous values will generate a repeating
# sequence.
#
# An object defines more complex generators. The type key determines the
# type of generator.
#
# source types:
#
# type: "rand<float>"
# generate random float values
# seed: seed to random number generator (default: 0)
# min: minimum value (default: 0.0)
# max: maximum value (default: 1.0)
#
# type: "zipf<integer>"
# generate random integer values using a Zipf distribution
# The generator generates values k ∈ [0, imax] such that P(k)
# is proportional to (v + k) ** (-s). Requirements: s > 1 and v ≥ 1.
# See https://golang.org/pkg/math/rand/#NewZipf for more information.
#
# seed: seed to random number generator (default: 0)
# s: float > 1 (required)
# v: float ≥ 1 (required)
# imax: integer (required)
#
fields = [
# Example constant float
{ name = "system", count = 5000, source = 2.5 },
# Example random floats
{ name = "user", count = 5000, source = { type = "rand<float>", seed = 10, min = 0.0, max = 1.0 } },
]
# Multiple measurements may be defined.
[[measurements]]
name = "mem"
tags = [
{ name = "host", source = { type = "sequence", format = "host-%s", start = 0, count = 5 } },
{ name = "region", source = ["us-west-01","us-west-02","us-east"] },
]
fields = [
# An example of a sequence of integer values
{ name = "free", count = 100, source = [10,15,20,25,30,35,30], time-precision = "ms" },
{ name = "low_mem", count = 100, source = [false,true,true], time-precision = "ms" },
]
`
Loading

0 comments on commit 8abb76c

Please sign in to comment.