Skip to content

Commit

Permalink
added utf8 fix (#77)
Browse files Browse the repository at this point in the history
* added utf8 fix?

* interface should be pointer to string

* fix comments

* check for UTF8 when decoding or encoding

* remove wrapping

* added tests

* update changelog, prep for release
  • Loading branch information
kristinapathak authored May 18, 2022
1 parent d39a7ef commit 840a19e
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 5 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [v3.1.3]
- Fix `500 Invalid WRP content type` for invalid `Accept` headers [#74](https://github.com/xmidt-org/wrp-go/pull/74)
- Added helper function that errors if a wrp message contains non-UTF-8 strings. [#77](https://github.com/xmidt-org/wrp-go/pull/77)

## [v3.1.2]
- Move ParseID func and relevant consts from webpa-common to wrp-go. [#75](https://github.com/xmidt-org/wrp-go/pull/75)
Expand Down Expand Up @@ -70,7 +73,8 @@ All changes included in [#47](https://github.com/xmidt-org/wrp-go/pull/47)
## [1.0.0]
- This release is exactly the same as the last version from github.com/xmidt-org/webpa-common/wrp

[Unreleased]: https://github.com/xmidt-org/wrp-go/compare/v3.1.2...HEAD
[Unreleased]: https://github.com/xmidt-org/wrp-go/compare/v3.1.3...HEAD
[v3.1.3]: https://github.com/xmidt-org/wrp-go/compare/v3.1.2...v3.1.3
[v3.1.2]: https://github.com/xmidt-org/wrp-go/compare/v3.1.1...v3.1.2
[v3.1.1]: https://github.com/xmidt-org/wrp-go/compare/v3.1.0...v3.1.1
[v3.1.0]: https://github.com/xmidt-org/wrp-go/compare/v3.0.2...v3.1.0
Expand Down
63 changes: 63 additions & 0 deletions utf8.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**
* Copyright 2022 Comcast Cable Communications Management, LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package wrp

import (
"errors"
"fmt"
"reflect"
"unicode/utf8"
)

var (
ErrNotUTF8 = errors.New("field contains non-utf-8 characters")
ErrUnexpectedKind = errors.New("A struct or non-nil pointer to struct is required")
)

// UTF8 takes any struct verifies that it contains UTF-8 strings.
func UTF8(v interface{}) error {
value := reflect.ValueOf(v)
if value.Kind() == reflect.Ptr && !value.IsNil() {
value = value.Elem()
}

if value.Kind() != reflect.Struct {
return fmt.Errorf("%w: %s", ErrUnexpectedKind, value.Kind())
}

for i := 0; i < value.NumField(); i++ {
ft := value.Type().Field(i)
if len(ft.PkgPath) > 0 || ft.Anonymous {
continue // skip embedded or unexported fields
}

f := value.Field(i)
if !f.CanInterface() {
continue // this should never happen, but ... you never know
}

if s, ok := f.Interface().(string); ok {
if !utf8.ValidString(s) {
return fmt.Errorf("%w: '%s:%v'", ErrNotUTF8, ft.Name, s)
}
fmt.Println(s)
}
}

return nil
}
60 changes: 56 additions & 4 deletions utf8_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/stretchr/testify/assert"
)

func TestInvalidUtf8(t *testing.T) {
func TestInvalidUtf8Decoding(t *testing.T) {
assert := assert.New(t)

/*
Expand All @@ -48,10 +48,62 @@ func TestInvalidUtf8(t *testing.T) {
decoder := NewDecoderBytes(invalid, Msgpack)
msg := new(Message)
err := decoder.Decode(msg)

assert.Nil(err)
assert.True(utf8.ValidString(msg.Source))

/* This fails. */
assert.True(utf8.ValidString(msg.Destination))
assert.False(utf8.ValidString(msg.Destination))
err = UTF8(msg)
assert.ErrorIs(err, ErrNotUTF8)
}

func TestUTF8(t *testing.T) {
type Test struct {
unexported string
Name string
Age int
}

testVal := Test{
unexported: "this shouldn't be output",
Name: "Joe Schmoe",
Age: 415,
}

tests := []struct {
description string
value interface{}
expectedErr error
}{
{
description: "Success",
value: testVal,
},
{
description: "Pointer success",
value: &testVal,
},
{
description: "Non struct error",
value: 5,
expectedErr: ErrUnexpectedKind,
},
{
description: "UTF8 error",
value: Test{
Name: string([]byte{0xbf}),
},
expectedErr: ErrNotUTF8,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
assert := assert.New(t)
err := UTF8(tc.value)
if tc.expectedErr == nil {
assert.NoError(err)
return
}
assert.ErrorIs(err, tc.expectedErr)
})
}
}

0 comments on commit 840a19e

Please sign in to comment.