Skip to content

Commit

Permalink
fix(mock): global seed flag is ignored (#376)
Browse files Browse the repository at this point in the history
* fix(mock): global seed flag is ignored

* fix: misc, readme, tests, schema

* fix: missing flag buffer-size in commands

* refactor: flags

* refactor: flags catchErrors, config

* refactor: flags caches, emptyInput

* refactor: flags pprof, mask

* refactor: flags repeat, repeat-until, repeat-while

* refactor: flags seed, serve

* refactor: flags fix writing flag

* refactor: flags skips

* refactor: flags stats, xmlsubscriber

* fix: add missing short command descriptions

* fix: flags
  • Loading branch information
adrienaury authored Dec 18, 2024
1 parent 8d6f426 commit 0e2e9ea
Show file tree
Hide file tree
Showing 11 changed files with 361 additions and 71 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ Types of changes
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [1.29.1]

- `Fixed` mock command ignores global seed flag
- `Fixed` missing flag `buffer-size` in `mock`, `xml` and `play` commands
- `Fixed` missing flag `load-cache` in `mock` command
- `Fixed` remove unused flags from `mock`, `xml`, `parquet`, `jsonschema`, `flow` and `play` commands

## [1.29.0]

- `Added` mask `apply` to externalize masks
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ You can use [LINO](https://github.com/CGI-FR/LINO) to extract sample data from a
You can also generate data with a simple yaml configuration file.

**Capabilities**

- credibility : generated data is not distinguishable from real data
- data synthesis : generate data from nothing
- data masking, including
Expand Down Expand Up @@ -163,6 +164,7 @@ The following types of masks can be used :
* [`fluxUri`](#fluxuri) is to replace by a sequence of values defined in an external resource.
* [`replacement`](#replacement) is to mask a data with another data from the jsonline.
* [`pipe`](#pipe) is a mask to handle complex nested array structures, it can read an array as an object stream and process it with a sub-pipeline.
* [`apply`](#apply) process selected data with a sub-pipeline.
* [`luhn`](#luhn) can generate valid numbers using the Luhn algorithm (e.g. french SIRET or SIREN).
* [`markov`](#markov) can generate pseudo text based on a sample text.
* [`findInCSV`](#findincsv) get one or multiple csv lines which matched with Json entry value from CSV files.
Expand Down Expand Up @@ -928,6 +930,24 @@ Be sure to check [demo](demo/demo8) to get more details about this mask.

[Return to list of masks](#possible-masks)

### Apply

[![Try it](https://img.shields.io/badge/-Try%20it%20in%20PIMO%20Play-brightgreen)](https://cgi-fr.github.io/pimo-play/#c=G4UwTgzglg9gdgLgAQCICMKBQBbAhhAayjgHMFMkkBaJCEAGxAGMAXGMcyrpAKwngAOuFgAtkKKACNccLNzyFO3JLgED6ATyXKkAVzBRxAOgD09KWFxgNJhUVJUpMoxuz0sQA&i=N4KABGBECWBGCGA7SAuKkQF8g)

This mask helps you organize your masking configuration in different files, enablig reuse and mutualisation of masks.

```yaml
version: "1"
masking:
- selector:
jsonpath: "iban"
mask:
apply:
uri: "./library/masking-iban.yml" # list of mask to apply on iban is declared in an external masking file
```

[Return to list of masks](#possible-masks)

### Luhn

[![Try it](https://img.shields.io/badge/-Try%20it%20in%20PIMO%20Play-brightgreen)](https://cgi-fr.github.io/pimo-play/#c=G4UwTgzglg9gdgLgAQCICMKBQEQgCbIAsATJgLYCGEA1lHAOYKZJIC0SOANiAMYAuMMExYikAKwjwADhT4ALZCmhgQfLKMo1hopJwCucxEgDeAXyA&i=N4KABGBEDOCWBOBTALpAXFAjAJgMwBYBWANgHYAOATgAYddIQBfIA)
Expand Down
122 changes: 122 additions & 0 deletions cmd/pimo/flags.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// Copyright (C) 2024 CGI France
//
// This file is part of PIMO.
//
// PIMO is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// PIMO is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with PIMO. If not, see <http://www.gnu.org/licenses/>.

package main

import (
"os"

"github.com/spf13/cobra"
)

type flag[T any] struct {
name string // Name of the flag
shorthand string // Optional short name
variable *T // Pointer to the variable
usage string // Description of the flag
}

// flags with default values
//
//nolint:gochecknoglobals
var (
maxBufferCapacity = 64
catchErrors = ""
maskingFile = "masking.yml"
mockConfigFile = "routes.yaml"
cachesToDump = map[string]string{}
cachesToLoad = map[string]string{}
emptyInput = false
maskingOneLiner = []string{}
profiling = ""
iteration = 1
repeatUntil = ""
repeatWhile = ""
seedValue = int64(0)
serve = ""
skipLineOnError = false
skipFieldOnError = false
skipLogFile = ""
statisticsDestination = os.Getenv("PIMO_STATS_URL")
statsTemplate = os.Getenv("PIMO_STATS_TEMPLATE")
xmlSubscriberName = map[string]string{}
)

//nolint:gochecknoglobals
var (
flagBufferSize = flag[int]{name: "buffer-size", variable: &maxBufferCapacity, usage: "buffer size in kB to load data from uri for each line"}
flagCatchErrors = flag[string]{name: "catch-errors", shorthand: "e", variable: &catchErrors, usage: "catch errors and write line in file, same as using skip-field-on-error + skip-log-file"}
flagConfigMasking = flag[string]{name: "config", shorthand: "c", variable: &maskingFile, usage: "name and location of the masking config file"}
flagConfigRoute = flag[string]{name: "config", shorthand: "c", variable: &mockConfigFile, usage: "name and location of the routes config file"}
flagCachesToDump = flag[map[string]string]{name: "dump-cache", variable: &cachesToDump, usage: "path for dumping cache into file"}
flagCachesToLoad = flag[map[string]string]{name: "load-cache", variable: &cachesToLoad, usage: "path for loading cache from file"}
flagEmptyInput = flag[bool]{name: "empty-input", variable: &emptyInput, usage: "generate data without any input, to use with repeat flag"}
flagMaskOneLiner = flag[[]string]{name: "mask", shorthand: "m", variable: &maskingOneLiner, usage: "one liner masking"}
flagProfiling = flag[string]{name: "pprof", variable: &profiling, usage: "create a pprof file - use 'cpu' to create a CPU pprof file or 'mem' to create an memory pprof file"}
flagRepeat = flag[int]{name: "repeat", shorthand: "r", variable: &iteration, usage: "number of iteration to mask each input"}
flagRepeatUntil = flag[string]{name: "repeat-until", variable: &repeatUntil, usage: "mask each input repeatedly until the given condition is met"}
flagRepeatWhile = flag[string]{name: "repeat-while", variable: &repeatWhile, usage: "mask each input repeatedly while the given condition is met"}
flagSeed = flag[int64]{name: "seed", shorthand: "s", variable: &seedValue, usage: "set global seed"}
flagServe = flag[string]{name: "serve", variable: &serve, usage: "listen/respond to HTTP interface and port instead of stdin/stdout, <ip>:<port> or :<port> to listen to all local networks"}
flagSkipLineOnError = flag[bool]{name: "skip-line-on-error", variable: &skipLineOnError, usage: "skip a line if an error occurs while masking a field"}
flagSkipFieldOnError = flag[bool]{name: "skip-field-on-error", variable: &skipFieldOnError, usage: "remove a field if an error occurs while masking this field"}
flagSkipLogFile = flag[string]{name: "skip-log-file", variable: &skipLogFile, usage: "skipped lines will be written to this log file"}
flagStatsDestination = flag[string]{name: "stats", variable: &statisticsDestination, usage: "generate execution statistics in the specified dump file"}
flagStatsTemplate = flag[string]{name: "statsTemplate", variable: &statsTemplate, usage: "template string to format stats (to include them you have to specify them as `{{ .Stats }}` like `{\"software\":\"PIMO\",\"stats\":{{ .Stats }}}`)"}
flagXMLSubscriberName = flag[map[string]string]{name: "subscriber", variable: &xmlSubscriberName, usage: "name of element to mask"}
)

func addFlag[T any](cmd *cobra.Command, flag flag[T]) {
switch variable := any(flag.variable).(type) {
case *int:
if len(flag.shorthand) > 0 {
cmd.Flags().IntVarP(variable, flag.name, flag.shorthand, *variable, flag.usage)
} else {
cmd.Flags().IntVar(variable, flag.name, *variable, flag.usage)
}
case *bool:
if len(flag.shorthand) > 0 {
cmd.Flags().BoolVarP(variable, flag.name, flag.shorthand, *variable, flag.usage)
} else {
cmd.Flags().BoolVar(variable, flag.name, *variable, flag.usage)
}
case *string:
if len(flag.shorthand) > 0 {
cmd.Flags().StringVarP(variable, flag.name, flag.shorthand, *variable, flag.usage)
} else {
cmd.Flags().StringVar(variable, flag.name, *variable, flag.usage)
}
case *int64:
if len(flag.shorthand) > 0 {
cmd.Flags().Int64VarP(variable, flag.name, flag.shorthand, *variable, flag.usage)
} else {
cmd.Flags().Int64Var(variable, flag.name, *variable, flag.usage)
}
case *map[string]string:
if len(flag.shorthand) > 0 {
cmd.Flags().StringToStringVarP(variable, flag.name, flag.shorthand, *variable, flag.usage)
} else {
cmd.Flags().StringToStringVar(variable, flag.name, *variable, flag.usage)
}
case *[]string:
if len(flag.shorthand) > 0 {
cmd.Flags().StringArrayVarP(variable, flag.name, flag.shorthand, *variable, flag.usage)
} else {
cmd.Flags().StringArrayVar(variable, flag.name, *variable, flag.usage)
}
}
}
128 changes: 73 additions & 55 deletions cmd/pimo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,40 +42,21 @@ import (
)

// Provisioned by ldflags
// nolint: gochecknoglobals
//
//nolint:gochecknoglobals
var (
version string
commit string
buildDate string
builtBy string

verbosity string
debug bool
jsonlog bool
colormode string
iteration int
emptyInput bool
maskingFile string
cachesToDump map[string]string
cachesToLoad map[string]string
skipLineOnError bool
skipFieldOnError bool
skipLogFile string
catchErrors string
seedValue int64
maskingOneLiner []string
repeatUntil string
repeatWhile string
statisticsDestination string
statsTemplate string
statsDestinationEnv = os.Getenv("PIMO_STATS_URL")
statsTemplateEnv = os.Getenv("PIMO_STATS_TEMPLATE")
xmlSubscriberName map[string]string
serve string
maxBufferCapacity int
profiling string
parquetInput string
parquetOutput string
verbosity string
debug bool
jsonlog bool
colormode string

parquetInput string
parquetOutput string
)

func main() {
Expand All @@ -102,28 +83,31 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
rootCmd.PersistentFlags().BoolVar(&debug, "debug", false, "add debug information to logs (very slow)")
rootCmd.PersistentFlags().BoolVar(&jsonlog, "log-json", false, "output logs in JSON format")
rootCmd.PersistentFlags().StringVar(&colormode, "color", "auto", "use colors in log outputs : yes, no or auto")
rootCmd.PersistentFlags().IntVarP(&iteration, "repeat", "r", 1, "number of iteration to mask each input")
rootCmd.PersistentFlags().BoolVar(&emptyInput, "empty-input", false, "generate data without any input, to use with repeat flag")
rootCmd.PersistentFlags().StringVarP(&maskingFile, "config", "c", "masking.yml", "name and location of the masking-config file")
rootCmd.PersistentFlags().StringToStringVar(&cachesToDump, "dump-cache", map[string]string{}, "path for dumping cache into file")
rootCmd.PersistentFlags().StringToStringVar(&cachesToLoad, "load-cache", map[string]string{}, "path for loading cache from file")
rootCmd.PersistentFlags().BoolVar(&skipLineOnError, "skip-line-on-error", false, "skip a line if an error occurs while masking a field")
rootCmd.PersistentFlags().BoolVar(&skipFieldOnError, "skip-field-on-error", false, "remove a field if an error occurs while masking this field")
rootCmd.PersistentFlags().StringVar(&skipLogFile, "skip-log-file", "", "skipped lines will be written to this log file")
rootCmd.PersistentFlags().StringVarP(&catchErrors, "catch-errors", "e", "", "catch errors and write line in file, same as using skip-field-on-error + skip-log-file")
rootCmd.Flags().Int64VarP(&seedValue, "seed", "s", 0, "set seed")
rootCmd.PersistentFlags().StringArrayVarP(&maskingOneLiner, "mask", "m", []string{}, "one liner masking")
rootCmd.PersistentFlags().StringVar(&repeatUntil, "repeat-until", "", "mask each input repeatedly until the given condition is met")
rootCmd.PersistentFlags().StringVar(&repeatWhile, "repeat-while", "", "mask each input repeatedly while the given condition is met")
rootCmd.PersistentFlags().StringVar(&statisticsDestination, "stats", statsDestinationEnv, "generate execution statistics in the specified dump file")
rootCmd.PersistentFlags().StringVar(&statsTemplate, "statsTemplate", statsTemplateEnv, "template string to format stats (to include them you have to specify them as `{{ .Stats }}` like `{\"software\":\"PIMO\",\"stats\":{{ .Stats }}}`)")
rootCmd.Flags().StringVar(&serve, "serve", "", "listen/respond to HTTP interface and port instead of stdin/stdout, <ip>:<port> or :<port> to listen to all local networks")
rootCmd.Flags().IntVar(&maxBufferCapacity, "buffer-size", 64, "buffer size in kB to load data from uri for each line")
rootCmd.Flags().StringVar(&profiling, "pprof", "", "create a pprof file - use 'cpu' to create a CPU pprof file or 'mem' to create an memory pprof file")

addFlag(rootCmd, flagBufferSize)
addFlag(rootCmd, flagCatchErrors)
addFlag(rootCmd, flagConfigMasking)
addFlag(rootCmd, flagCachesToDump)
addFlag(rootCmd, flagCachesToLoad)
addFlag(rootCmd, flagEmptyInput)
addFlag(rootCmd, flagMaskOneLiner)
addFlag(rootCmd, flagProfiling)
addFlag(rootCmd, flagRepeat)
addFlag(rootCmd, flagRepeatUntil)
addFlag(rootCmd, flagRepeatWhile)
addFlag(rootCmd, flagSeed)
addFlag(rootCmd, flagServe)
addFlag(rootCmd, flagSkipFieldOnError)
addFlag(rootCmd, flagSkipLineOnError)
addFlag(rootCmd, flagSkipLogFile)
addFlag(rootCmd, flagStatsDestination)
addFlag(rootCmd, flagStatsTemplate)

rootCmd.AddCommand(&cobra.Command{
Use: "jsonschema",
Use: "jsonschema",
Short: "Export schema of masking configuration",
Run: func(cmd *cobra.Command, args []string) {
initLog()
jsonschema, err := pimo.GetJsonSchema()
if err != nil {
os.Exit(8)
Expand All @@ -135,8 +119,11 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
xmlCmd := &cobra.Command{
Use: "xml",
Short: "Parsing and masking XML file",
Run: func(cmd *cobra.Command, args []string) {
Run: func(cmd *cobra.Command, _ []string) {
initLog()
if maxBufferCapacity > 0 {
uri.MaxCapacityForEachLine = maxBufferCapacity * 1024
}
if len(catchErrors) > 0 {
skipLineOnError = true
skipLogFile = catchErrors
Expand Down Expand Up @@ -185,8 +172,18 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
}
},
}
xmlCmd.Flags().StringToStringVar(&xmlSubscriberName, "subscriber", map[string]string{}, "name of element to mask")
xmlCmd.Flags().Int64VarP(&seedValue, "seed", "s", 0, "set seed")
addFlag(xmlCmd, flagBufferSize)
addFlag(xmlCmd, flagCatchErrors)
addFlag(xmlCmd, flagCachesToDump)
addFlag(xmlCmd, flagCachesToLoad)
// addFlag(xmlCmd, flagProfiling) //could use
addFlag(xmlCmd, flagSeed)
addFlag(xmlCmd, flagSkipFieldOnError)
addFlag(xmlCmd, flagSkipLineOnError)
addFlag(xmlCmd, flagSkipLogFile)
// addFlag(xmlCmd, flagStatsDestination) // could use
// addFlag(xmlCmd, flagStatsTemplate) // could use
addFlag(xmlCmd, flagXMLSubscriberName)
rootCmd.AddCommand(xmlCmd)

// Add command for parquet transformer
Expand All @@ -206,12 +203,26 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
run(cmd)
},
}
parquetCmd.Flags().Int64VarP(&seedValue, "seed", "s", 0, "set seed")
addFlag(parquetCmd, flagBufferSize)
addFlag(parquetCmd, flagCatchErrors)
addFlag(parquetCmd, flagConfigMasking)
addFlag(parquetCmd, flagCachesToDump)
addFlag(parquetCmd, flagCachesToLoad)
addFlag(parquetCmd, flagMaskOneLiner)
addFlag(parquetCmd, flagProfiling)
addFlag(parquetCmd, flagSeed)
addFlag(parquetCmd, flagSkipFieldOnError)
addFlag(parquetCmd, flagSkipLineOnError)
addFlag(parquetCmd, flagSkipLogFile)
addFlag(parquetCmd, flagStatsDestination)
addFlag(parquetCmd, flagStatsTemplate)
rootCmd.AddCommand(parquetCmd)

rootCmd.AddCommand(&cobra.Command{
Use: "flow",
flowCmd := &cobra.Command{
Use: "flow",
Short: "Export masking configuration as graphviz diagram",
Run: func(cmd *cobra.Command, args []string) {
initLog()
pdef, err := model.LoadPipelineDefinitionFromFile(maskingFile)
if err != nil {
log.Err(err).Msg("Cannot load pipeline definition from file")
Expand All @@ -224,15 +235,21 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
}
fmt.Println(flow)
},
})
}
rootCmd.AddCommand(flowCmd)

playPort := 3010
playSecure := false
playCmd := &cobra.Command{
Use: "play",
Use: "play",
Short: "Start local website to play with PIMO",
Run: func(cmd *cobra.Command, args []string) {
initLog()

if maxBufferCapacity > 0 {
uri.MaxCapacityForEachLine = maxBufferCapacity * 1024
}

router := pimo.Play(playSecure)
port := fmt.Sprintf("0.0.0.0:%d", playPort)

Expand All @@ -243,6 +260,7 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
}
playCmd.PersistentFlags().IntVarP(&playPort, "port", "p", 3010, "port number")
playCmd.PersistentFlags().BoolVarP(&playSecure, "secure", "s", false, "enable security features (use this flag if PIMO Play is publicly exposed)")
addFlag(playCmd, flagBufferSize)
rootCmd.AddCommand(playCmd)

setupMockCommand(rootCmd)
Expand Down
Loading

0 comments on commit 0e2e9ea

Please sign in to comment.