Skip to content

Commit

Permalink
Merge pull request #9793 from influxdata/bj-delete-nonprintable-unicode
Browse files Browse the repository at this point in the history
Add key sanitization to deletetsm
  • Loading branch information
benbjohnson authored Apr 30, 2018
2 parents f459d87 + 4cb36d5 commit 2e47097
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 5 deletions.
14 changes: 9 additions & 5 deletions cmd/influx_inspect/deletetsm/deletetsm.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type Command struct {
Stdout io.Writer

measurement string // measurement to delete
sanitize bool // remove all keys with non-printable unicode
verbose bool // verbose logging
}

Expand All @@ -36,6 +37,7 @@ func NewCommand() *Command {
func (cmd *Command) Run(args ...string) (err error) {
fs := flag.NewFlagSet("deletetsm", flag.ExitOnError)
fs.StringVar(&cmd.measurement, "measurement", "", "")
fs.BoolVar(&cmd.sanitize, "sanitize", false, "")
fs.BoolVar(&cmd.verbose, "v", false, "")
fs.SetOutput(cmd.Stdout)
fs.Usage = cmd.printUsage
Expand All @@ -51,9 +53,9 @@ func (cmd *Command) Run(args ...string) (err error) {
log.SetOutput(ioutil.Discard)
}

// Validate measurement flag.
if cmd.measurement == "" {
return fmt.Errorf("measurement name required")
// Validate measurement or sanitize flag.
if cmd.measurement == "" && !cmd.sanitize {
return fmt.Errorf("-measurement or -sanitize flag required")
}

// Process each TSM file.
Expand Down Expand Up @@ -113,8 +115,8 @@ func (cmd *Command) process(path string) error {

// Skip block if this is the measurement and time range we are deleting.
series, _ := tsm1.SeriesAndFieldFromCompositeKey(key)
measurement, _ := models.ParseKey(series)
if string(measurement) == cmd.measurement {
measurement, tags := models.ParseKey(series)
if string(measurement) == cmd.measurement || (cmd.sanitize && !models.ValidKeyTokens(measurement, tags)) {
log.Printf("deleting block: %s (%s-%s) sz=%d",
key,
time.Unix(0, minTime).UTC().Format(time.RFC3339Nano),
Expand Down Expand Up @@ -147,6 +149,8 @@ Usage: influx_inspect deletetsm [flags] path...
-measurement NAME
The name of the measurement to remove.
-sanitize
Remove all keys with non-printable unicode characters.
-v
Enable verbose logging.`)
}
29 changes: 29 additions & 0 deletions models/points.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"strconv"
"strings"
"time"
"unicode"
"unicode/utf8"

"github.com/influxdata/influxdb/pkg/escape"
)
Expand Down Expand Up @@ -2399,3 +2401,30 @@ func appendField(b []byte, k string, v interface{}) []byte {

return b
}

// ValidKeyToken returns true if the token used for measurement, tag key, or tag
// value is a valid unicode string and only contains printable, non-replacement characters.
func ValidKeyToken(s string) bool {
if !utf8.ValidString(s) {
return false
}
for _, r := range s {
if !unicode.IsPrint(r) || r == unicode.ReplacementChar {
return false
}
}
return true
}

// ValidKeyTokens returns true if the measurement name and all tags are valid.
func ValidKeyTokens(name string, tags Tags) bool {
if !ValidKeyToken(name) {
return false
}
for _, tag := range tags {
if !ValidKeyToken(string(tag.Key)) || !ValidKeyToken(string(tag.Value)) {
return false
}
}
return true
}

0 comments on commit 2e47097

Please sign in to comment.