Skip to content

Commit

Permalink
Allow strict usage of utf8.
Browse files Browse the repository at this point in the history
  • Loading branch information
mattdurham committed Oct 17, 2023
1 parent 8ee2874 commit 219dbea
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 10 deletions.
6 changes: 5 additions & 1 deletion cmd/internal/flowmode/cmd_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ func loadFlowSource(path string, converterSourceFormat string, converterBypassEr
bb, err := os.ReadFile(curPath)
// Retain the original error and don't try to convert if an error occurred.
if err != nil {
bb, err = config.EnsureUTF8(bb)
bb, err = config.EnsureUTF8(bb, true)
}
sources[curPath] = bb
return err
Expand All @@ -402,6 +402,10 @@ func loadFlowSource(path string, converterSourceFormat string, converterBypassEr
if err != nil {
return nil, err
}
bb, err = config.EnsureUTF8(bb, true)
if err != nil {
return nil, err
}
if converterSourceFormat != "flow" {
var diags convert_diag.Diagnostics
bb, diags = converter.Convert(bb, converter.Input(converterSourceFormat))
Expand Down
22 changes: 13 additions & 9 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/stretchr/testify/require"
"golang.org/x/text/encoding"
uni "golang.org/x/text/encoding/unicode"
utf32 "golang.org/x/text/encoding/unicode/utf32"
"golang.org/x/text/encoding/unicode/utf32"
"gopkg.in/yaml.v2"
)

Expand Down Expand Up @@ -344,26 +344,26 @@ func LoadRemote(url string, expandEnvVars bool, c *Config) error {
}

func performEnvVarExpansion(buf []byte, expandEnvVars bool) ([]byte, error) {
utf8Buf, err := EnsureUTF8(buf, false)
if err != nil {
return nil, err
}
// (Optionally) expand with environment variables
if expandEnvVars {
s, err := envsubst.Eval(string(buf), getenv)
s, err := envsubst.Eval(string(utf8Buf), getenv)
if err != nil {
return nil, fmt.Errorf("unable to substitute config with environment variables: %w", err)
}
return []byte(s), nil
}
return buf, nil
return utf8Buf, nil
}

// LoadBytes unmarshals a config from a buffer. Defaults are not
// applied to the file and must be done manually if LoadBytes
// is called directly.
func LoadBytes(buf []byte, expandEnvVars bool, c *Config) error {
utf8Buf, err := EnsureUTF8(buf)
if err != nil {
return err
}
expandedBuf, err := performEnvVarExpansion(utf8Buf, expandEnvVars)
expandedBuf, err := performEnvVarExpansion(buf, expandEnvVars)
if err != nil {
return err
}
Expand All @@ -372,7 +372,8 @@ func LoadBytes(buf []byte, expandEnvVars bool, c *Config) error {
}

// EnsureUTF8 will convert from the most common encodings to UTF8.
func EnsureUTF8(config []byte) ([]byte, error) {
// If useStrictUTF8 is enabled then if the file is not already utf8 then an error will be returned.
func EnsureUTF8(config []byte, useStrictUTF8 bool) ([]byte, error) {
buffer := bytes.NewBuffer(config)
src, enc := utfbom.Skip(buffer)
var converted []byte
Expand All @@ -399,6 +400,9 @@ func EnsureUTF8(config []byte) ([]byte, error) {
}
return nil, fmt.Errorf("unknown encoding for config")
}
if useStrictUTF8 {
return nil, fmt.Errorf("configuration is encoded with %s but must be utf8", enc.String())
}
decoder := encoder.NewDecoder()
converted, err = decoder.Bytes(skippedBytes)
return converted, err
Expand Down
9 changes: 9 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -578,3 +578,12 @@ func TestConfigEncoding(t *testing.T) {
})
}
}

func TestConfigEncodingStruct(t *testing.T) {
buf, err := os.ReadFile(path.Join("encoding_configs", "test_encoding_utf16le.txt"))
require.NoError(t, err)
_, err = EnsureUTF8(buf, false)
require.NoError(t, err)
_, err = EnsureUTF8(buf, true)
require.Error(t, err)
}

0 comments on commit 219dbea

Please sign in to comment.