From 6077a84cc2aee7a73dd501f80c4d54b4b72d9901 Mon Sep 17 00:00:00 2001 From: Neil Pankey Date: Wed, 16 Sep 2020 19:47:44 -0700 Subject: [PATCH 1/4] test: build on go 1.11 --- main_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main_test.go b/main_test.go index a2883d1..b4475ff 100644 --- a/main_test.go +++ b/main_test.go @@ -69,10 +69,10 @@ func TestMain(t *testing.T) { } for _, tt := range tests { - in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator)) + in := strings.Replace(tt.in, "/", string(filepath.Separator), -1) sort.Strings(tt.out) out := strings.Join(tt.out, "\n") - out = strings.ReplaceAll(out, "/", string(filepath.Separator)) + out = strings.Replace(out, "/", string(filepath.Separator), -1) t.Run(in, func(t *testing.T) { var w strings.Builder From 6f93deba88eba7463860d4c854aa7ac5f08e9140 Mon Sep 17 00:00:00 2001 From: Neil Pankey Date: Thu, 17 Sep 2020 19:37:28 -0700 Subject: [PATCH 2/4] test: generate utf-16 testdata and bom variants --- gen_testdata.go | 57 +++++++++++++++++++++++++ main_test.go | 59 +++++++++++++------------- testdata/utf-16be/data-error.json | Bin 0 -> 30 bytes testdata/utf-16be/data-error.yml | Bin 0 -> 108 bytes testdata/utf-16be/data-fail.json | Bin 0 -> 58 bytes testdata/utf-16be/data-fail.yml | Bin 0 -> 42 bytes testdata/utf-16be/data-pass.json | Bin 0 -> 82 bytes testdata/utf-16be/data-pass.yml | Bin 0 -> 48 bytes testdata/utf-16be/schema.json | Bin 0 -> 220 bytes testdata/utf-16be/schema.yml | Bin 0 -> 156 bytes testdata/utf-16be_bom/data-error.json | Bin 0 -> 32 bytes testdata/utf-16be_bom/data-error.yml | Bin 0 -> 110 bytes testdata/utf-16be_bom/data-fail.json | Bin 0 -> 60 bytes testdata/utf-16be_bom/data-fail.yml | Bin 0 -> 44 bytes testdata/utf-16be_bom/data-pass.json | Bin 0 -> 84 bytes testdata/utf-16be_bom/data-pass.yml | Bin 0 -> 50 bytes testdata/utf-16be_bom/schema.json | Bin 0 -> 222 bytes testdata/utf-16be_bom/schema.yml | Bin 0 -> 158 bytes testdata/utf-16le/data-error.json | Bin 0 -> 30 bytes testdata/utf-16le/data-error.yml | Bin 0 -> 108 bytes testdata/utf-16le/data-fail.json | Bin 0 -> 58 bytes testdata/utf-16le/data-fail.yml | Bin 0 -> 42 bytes testdata/utf-16le/data-pass.json | Bin 0 -> 82 bytes testdata/utf-16le/data-pass.yml | Bin 0 -> 48 bytes testdata/utf-16le/schema.json | Bin 0 -> 220 bytes testdata/utf-16le/schema.yml | Bin 0 -> 156 bytes testdata/utf-16le_bom/data-error.json | Bin 0 -> 32 bytes testdata/utf-16le_bom/data-error.yml | Bin 0 -> 110 bytes testdata/utf-16le_bom/data-fail.json | Bin 0 -> 60 bytes testdata/utf-16le_bom/data-fail.yml | Bin 0 -> 44 bytes testdata/utf-16le_bom/data-pass.json | Bin 0 -> 84 bytes testdata/utf-16le_bom/data-pass.yml | Bin 0 -> 50 bytes testdata/utf-16le_bom/schema.json | Bin 0 -> 222 bytes testdata/utf-16le_bom/schema.yml | Bin 0 -> 158 bytes testdata/{ => utf-8}/data-error.json | 0 testdata/{ => utf-8}/data-error.yml | 0 testdata/{ => utf-8}/data-fail.json | 0 testdata/{ => utf-8}/data-fail.yml | 0 testdata/{ => utf-8}/data-pass.json | 0 testdata/{ => utf-8}/data-pass.yml | 0 testdata/{ => utf-8}/schema.json | 0 testdata/{ => utf-8}/schema.yml | 0 testdata/utf-8_bom/data-error.json | 1 + testdata/utf-8_bom/data-error.yml | 1 + testdata/utf-8_bom/data-fail.json | 3 ++ testdata/utf-8_bom/data-fail.yml | 2 + testdata/utf-8_bom/data-pass.json | 4 ++ testdata/utf-8_bom/data-pass.yml | 3 ++ testdata/utf-8_bom/schema.json | 7 +++ testdata/utf-8_bom/schema.yml | 7 +++ 50 files changed, 114 insertions(+), 30 deletions(-) create mode 100644 gen_testdata.go create mode 100644 testdata/utf-16be/data-error.json create mode 100644 testdata/utf-16be/data-error.yml create mode 100644 testdata/utf-16be/data-fail.json create mode 100644 testdata/utf-16be/data-fail.yml create mode 100644 testdata/utf-16be/data-pass.json create mode 100644 testdata/utf-16be/data-pass.yml create mode 100644 testdata/utf-16be/schema.json create mode 100644 testdata/utf-16be/schema.yml create mode 100644 testdata/utf-16be_bom/data-error.json create mode 100644 testdata/utf-16be_bom/data-error.yml create mode 100644 testdata/utf-16be_bom/data-fail.json create mode 100644 testdata/utf-16be_bom/data-fail.yml create mode 100644 testdata/utf-16be_bom/data-pass.json create mode 100644 testdata/utf-16be_bom/data-pass.yml create mode 100644 testdata/utf-16be_bom/schema.json create mode 100644 testdata/utf-16be_bom/schema.yml create mode 100644 testdata/utf-16le/data-error.json create mode 100644 testdata/utf-16le/data-error.yml create mode 100644 testdata/utf-16le/data-fail.json create mode 100644 testdata/utf-16le/data-fail.yml create mode 100644 testdata/utf-16le/data-pass.json create mode 100644 testdata/utf-16le/data-pass.yml create mode 100644 testdata/utf-16le/schema.json create mode 100644 testdata/utf-16le/schema.yml create mode 100644 testdata/utf-16le_bom/data-error.json create mode 100644 testdata/utf-16le_bom/data-error.yml create mode 100644 testdata/utf-16le_bom/data-fail.json create mode 100644 testdata/utf-16le_bom/data-fail.yml create mode 100644 testdata/utf-16le_bom/data-pass.json create mode 100644 testdata/utf-16le_bom/data-pass.yml create mode 100644 testdata/utf-16le_bom/schema.json create mode 100644 testdata/utf-16le_bom/schema.yml rename testdata/{ => utf-8}/data-error.json (100%) rename testdata/{ => utf-8}/data-error.yml (100%) rename testdata/{ => utf-8}/data-fail.json (100%) rename testdata/{ => utf-8}/data-fail.yml (100%) rename testdata/{ => utf-8}/data-pass.json (100%) rename testdata/{ => utf-8}/data-pass.yml (100%) rename testdata/{ => utf-8}/schema.json (100%) rename testdata/{ => utf-8}/schema.yml (100%) create mode 100644 testdata/utf-8_bom/data-error.json create mode 100644 testdata/utf-8_bom/data-error.yml create mode 100644 testdata/utf-8_bom/data-fail.json create mode 100644 testdata/utf-8_bom/data-fail.yml create mode 100644 testdata/utf-8_bom/data-pass.json create mode 100644 testdata/utf-8_bom/data-pass.yml create mode 100644 testdata/utf-8_bom/schema.json create mode 100644 testdata/utf-8_bom/schema.yml diff --git a/gen_testdata.go b/gen_testdata.go new file mode 100644 index 0000000..062497b --- /dev/null +++ b/gen_testdata.go @@ -0,0 +1,57 @@ +// +build ignore + +// generates clones the utf-8 tests data to the other +// unicode encodings and adds BOM variants of each. +package main + +import ( + "io/ioutil" + "log" + "os" + "path/filepath" + + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/unicode" +) + + +func main() { + var xforms = []struct { + dir, bom string + enc encoding.Encoding + } { + { "testdata/utf-16be", "\xFE\xFF", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) }, + { "testdata/utf-16le", "\xFF\xFE", unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) }, + } + + paths, _ := filepath.Glob("testdata/utf-8/*") + for _, p := range paths { + src, err := ioutil.ReadFile(p) + if err != nil { + log.Fatal(err) + } + + write("testdata/utf-8_bom", p, "\xEF\xBB\xBF", src) + for _, xform := range xforms { + dst, err := xform.enc.NewEncoder().Bytes(src) + if err != nil { + log.Fatal(err) + } + write(xform.dir, p, "", dst) + write(xform.dir + "_bom", p, xform.bom, dst) + } + } +} + +func write(dir, orig, bom string, buf []byte) { + f, err := os.Create(filepath.Join(dir, filepath.Base(orig))) + if err != nil { + log.Fatal(err) + } + if _, err = f.Write([]byte(bom)); err != nil { + log.Fatal(err) + } + if _, err = f.Write(buf); err != nil { + log.Fatal(err) + } +} diff --git a/main_test.go b/main_test.go index b4475ff..878fe8b 100644 --- a/main_test.go +++ b/main_test.go @@ -9,61 +9,61 @@ import ( func TestMain(t *testing.T) { tests := []struct { - in string - out []string + in string + out []string exit int - } { + }{ { - "-s testdata/schema.yml testdata/data-pass.yml", - []string{"testdata/data-pass.yml: pass"}, + "-s testdata/utf-8/schema.yml testdata/utf-8/data-pass.yml", + []string{"testdata/utf-8/data-pass.yml: pass"}, 0, }, { - "-s testdata/schema.json testdata/data-pass.yml", - []string{"testdata/data-pass.yml: pass"}, + "-s testdata/utf-8/schema.json testdata/utf-8/data-pass.yml", + []string{"testdata/utf-8/data-pass.yml: pass"}, 0, }, { - "-s testdata/schema.json testdata/data-pass.json", - []string{"testdata/data-pass.json: pass"}, + "-s testdata/utf-8/schema.json testdata/utf-8/data-pass.json", + []string{"testdata/utf-8/data-pass.json: pass"}, 0, }, { - "-s testdata/schema.yml testdata/data-pass.json", - []string{"testdata/data-pass.json: pass"}, + "-s testdata/utf-8/schema.yml testdata/utf-8/data-pass.json", + []string{"testdata/utf-8/data-pass.json: pass"}, 0, }, { - "-q -s testdata/schema.yml testdata/data-fail.yml", - []string{"testdata/data-fail.yml: fail: (root): foo is required"}, + "-q -s testdata/utf-8/schema.yml testdata/utf-8/data-fail.yml", + []string{"testdata/utf-8/data-fail.yml: fail: (root): foo is required"}, 1, }, { - "-q -s testdata/schema.json testdata/data-fail.yml", - []string{"testdata/data-fail.yml: fail: (root): foo is required"}, + "-q -s testdata/utf-8/schema.json testdata/utf-8/data-fail.yml", + []string{"testdata/utf-8/data-fail.yml: fail: (root): foo is required"}, 1, }, { - "-q -s testdata/schema.json testdata/data-fail.json", - []string{"testdata/data-fail.json: fail: (root): foo is required"}, + "-q -s testdata/utf-8/schema.json testdata/utf-8/data-fail.json", + []string{"testdata/utf-8/data-fail.json: fail: (root): foo is required"}, 1, }, { - "-q -s testdata/schema.yml testdata/data-fail.json", - []string{"testdata/data-fail.json: fail: (root): foo is required"}, + "-q -s testdata/utf-8/schema.yml testdata/utf-8/data-fail.json", + []string{"testdata/utf-8/data-fail.json: fail: (root): foo is required"}, 1, }, { - "-q -s testdata/schema.json testdata/data-error.json", - []string{"testdata/data-error.json: error: validate: invalid character 'o' in literal null (expecting 'u')"}, + "-q -s testdata/utf-8/schema.json testdata/utf-8/data-error.json", + []string{"testdata/utf-8/data-error.json: error: validate: invalid character 'o' in literal null (expecting 'u')"}, 2, }, { - "-q -s testdata/schema.yml testdata/data-error.yml", - []string{"testdata/data-error.yml: error: load doc: yaml: found unexpected end of stream"}, + "-q -s testdata/utf-8/schema.yml testdata/utf-8/data-error.yml", + []string{"testdata/utf-8/data-error.yml: error: load doc: yaml: found unexpected end of stream"}, 2, }, { - "-q -s testdata/schema.json testdata/data-*.json", + "-q -s testdata/utf-8/schema.json testdata/utf-8/data-*.json", []string{ - "testdata/data-fail.json: fail: (root): foo is required", - "testdata/data-error.json: error: validate: invalid character 'o' in literal null (expecting 'u')", + "testdata/utf-8/data-fail.json: fail: (root): foo is required", + "testdata/utf-8/data-error.json: error: validate: invalid character 'o' in literal null (expecting 'u')", }, 3, }, { - "-q -s testdata/schema.yml testdata/data-*.yml", + "-q -s testdata/utf-8/schema.yml testdata/utf-8/data-*.yml", []string{ - "testdata/data-error.yml: error: load doc: yaml: found unexpected end of stream", - "testdata/data-fail.yml: fail: (root): foo is required", + "testdata/utf-8/data-error.yml: error: load doc: yaml: found unexpected end of stream", + "testdata/utf-8/data-fail.yml: fail: (root): foo is required", }, 3, }, } @@ -89,4 +89,3 @@ func TestMain(t *testing.T) { }) } } - diff --git a/testdata/utf-16be/data-error.json b/testdata/utf-16be/data-error.json new file mode 100644 index 0000000000000000000000000000000000000000..74920f2f29711d04beca75a244f0ce3310c1ea19 GIT binary patch literal 30 icmZR`W5{PHVNhTwV@PDkVaQ}i0kX3gih-he3|s(Jg9T;) literal 0 HcmV?d00001 diff --git a/testdata/utf-16be/data-error.yml b/testdata/utf-16be/data-error.yml new file mode 100644 index 0000000000000000000000000000000000000000..b0c91f19b96f9d6bd513c0b368e800395b6cbec0 GIT binary patch literal 108 zcmYj}K?;B{37ovw(76Dt4! literal 0 HcmV?d00001 diff --git a/testdata/utf-16be/data-fail.json b/testdata/utf-16be/data-fail.json new file mode 100644 index 0000000000000000000000000000000000000000..646c6c55c3ae37730d6f7c019adec120c90d43b0 GIT binary patch literal 58 zcmZRmX5eB_fI=mPB!)zWA_gS}E1-xHLoP!mLopC$GUPF&14Ysp@_|qZsH2vF3jo?B B2n+xK literal 0 HcmV?d00001 diff --git a/testdata/utf-16be/data-fail.yml b/testdata/utf-16be/data-fail.yml new file mode 100644 index 0000000000000000000000000000000000000000..b0dc72605189e83b51522bfe72e89229e0528011 GIT binary patch literal 42 qcmZS3WdK1gh9rhWh9U+l1_g#(hD?THAk1XQV@L<`(-`uBkP85Q%m#@7 literal 0 HcmV?d00001 diff --git a/testdata/utf-16be/data-pass.json b/testdata/utf-16be/data-pass.json new file mode 100644 index 0000000000000000000000000000000000000000..b0d7261160409b9cd2670c27a276460b2f5a08d5 GIT binary patch literal 82 zcmZRmX5eB_fI=mPG=_X2RAR6KiYPH8G88kU0L7FTbWl_!0cDEds;U?&7?K&vfP5~7 HS_UoviNy)? literal 0 HcmV?d00001 diff --git a/testdata/utf-16be/data-pass.yml b/testdata/utf-16be/data-pass.yml new file mode 100644 index 0000000000000000000000000000000000000000..80b8d780aced3b101b73f3712b377d7872341f1e GIT binary patch literal 48 tcmZS3WdK1ghBStJAhcpoU`S*rW=H|@xfqgw>>{vO6+;C>GD8^y7XYFo2Rr}( literal 0 HcmV?d00001 diff --git a/testdata/utf-16be/schema.json b/testdata/utf-16be/schema.json new file mode 100644 index 0000000000000000000000000000000000000000..ed742390b4f5d72837cf95dcf3d3826140eafba5 GIT binary patch literal 220 zcmY+8I|_g>5Cy096d|=I@G4@V_`$|+EJVD!PmLN0WEXbk?XnSJ{Id*9^!h7RN2@wx zNi+|+kFQ1dFJaU<*2}*wE8i)rrK8c*uoDLxzodd(+_P&Bn7?5~Rg5gkJiF36sp@l0 HCR+&yJ4hT^ literal 0 HcmV?d00001 diff --git a/testdata/utf-16be/schema.yml b/testdata/utf-16be/schema.yml new file mode 100644 index 0000000000000000000000000000000000000000..420e8ba4bb742f8abc77d5a95bfce16379e9040f GIT binary patch literal 156 zcmZS3WdK1gh608nhI}BI3ZzRIGJ$k4gB4I*0SeQAviT??FqsmDO0YgFpvq#P-Xfsx oJce{6y-5s-Kv9rya;G|Awwxp9f*~JVyZ4s6~rzs0HmfBxc~qF literal 0 HcmV?d00001 diff --git a/testdata/utf-16be_bom/data-error.json b/testdata/utf-16be_bom/data-error.json new file mode 100644 index 0000000000000000000000000000000000000000..240156890c7692c7f7d9e5df49eeb34c01bb5a8e GIT binary patch literal 32 lcmezOpCOMSpP__7fuW2cks*g6lOYAj&SEGAismtJ0RWUH2V(#L literal 0 HcmV?d00001 diff --git a/testdata/utf-16be_bom/data-error.yml b/testdata/utf-16be_bom/data-error.yml new file mode 100644 index 0000000000000000000000000000000000000000..499fc64eb61610a5b5c666a0cabe8ee5847a4843 GIT binary patch literal 110 zcmYj}K?;B{3UQ)19TQI!OgDT1r2VyIw9W+(&l Jxfp60xBxHP3-AB{ literal 0 HcmV?d00001 diff --git a/testdata/utf-16be_bom/data-pass.yml b/testdata/utf-16be_bom/data-pass.yml new file mode 100644 index 0000000000000000000000000000000000000000..277d96d955afa1c42549643f587d3d60e098e880 GIT binary patch literal 50 ucmezOpFtN4xfs$I@`2EbL4hHWp_m~B$me260bG{KSQbS681iej&NoX^tIk4#+h9rhWh9U+l1_g#(hD?THAk1XQV@L<`(-`uBkc$BTg;WNK literal 0 HcmV?d00001 diff --git a/testdata/utf-16le/data-pass.json b/testdata/utf-16le/data-pass.json new file mode 100644 index 0000000000000000000000000000000000000000..28357344bee9e5cc940a51cc7e9d7e36ba644573 GIT binary patch literal 82 zcmbj%#grIyP*f!WWs2acsu(I5k{QZ?d@hDs GAO--Fw+ZtA literal 0 HcmV?d00001 diff --git a/testdata/utf-16le/data-pass.yml b/testdata/utf-16le/data-pass.yml new file mode 100644 index 0000000000000000000000000000000000000000..ebb8581e4f4f41e334045d3dbe11af0809d2bd3d GIT binary patch literal 48 tcmdO8&;>#+hBStJAhcpoU`S*rW=H|@xfqgw>>{vO6+;C>GD8_q3;?NK2Rr}( literal 0 HcmV?d00001 diff --git a/testdata/utf-16le/schema.json b/testdata/utf-16le/schema.json new file mode 100644 index 0000000000000000000000000000000000000000..2b84fea7e19373707542feccfccd560ad2166318 GIT binary patch literal 220 zcmZvWO$vY@6ohBpDa3Ur=qk~o$ihwbV~Y^)-dI`&Avh1-_l-If5b;RNSYdc8riEs1 zkf^wK8Tax7Y4Z?*lf`lQw#+h608nhI}BI3ZzRIGJ$k4gB4I*0SeQAviT??FqsmDO0YgFpvq#P-Xfsx pJce{6y-5s-Kv9rya;G|Awwxp9f*~JVyZ4s6~rzs1^~KV6}bQa literal 0 HcmV?d00001 diff --git a/testdata/utf-16le_bom/data-error.json b/testdata/utf-16le_bom/data-error.json new file mode 100644 index 0000000000000000000000000000000000000000..4f103343818acdbcda160e0dda735ab53391ded3 GIT binary patch literal 32 lcmezWFOMOgp@cz!p^PDsA%`K8AqB|JVkicR<}q+F005bc2V(#L literal 0 HcmV?d00001 diff --git a/testdata/utf-16le_bom/data-error.yml b/testdata/utf-16le_bom/data-error.yml new file mode 100644 index 0000000000000000000000000000000000000000..838a4a95543495aefd1a483dd541179c6ae2def2 GIT binary patch literal 110 zcmYj}K@LDr5JTTlCN}QiEH>i*$RrH%k+?j{%4XV^c6whIH-PS+ueDU!0 literal 0 HcmV?d00001 diff --git a/testdata/utf-16le_bom/data-fail.json b/testdata/utf-16le_bom/data-fail.json new file mode 100644 index 0000000000000000000000000000000000000000..f5030d96f83b7edb35e8132b448b90acfef2c645 GIT binary patch literal 60 zcmezWubP32K>-St7?Kzg8HyN`7_5LIN({LSnGD51n8}dGkPZ|{W5@?WC7_O4AO-+f Cj|vF@ literal 0 HcmV?d00001 diff --git a/testdata/utf-16le_bom/data-fail.yml b/testdata/utf-16le_bom/data-fail.yml new file mode 100644 index 0000000000000000000000000000000000000000..90d4179e98f5ddfa8b6138fd2f654542461cc7cb GIT binary patch literal 44 scmezWPnQ7%xfqfd5*dmZtQZs+av3riih(ebA&(&)$WLR)2SP3e0Nd0Eh5!Hn literal 0 HcmV?d00001 diff --git a/testdata/utf-16le_bom/data-pass.json b/testdata/utf-16le_bom/data-pass.json new file mode 100644 index 0000000000000000000000000000000000000000..04c3ad0db6025affd7921db24988c2f593fc7938 GIT binary patch literal 84 zcmezWubP32K>-St7}6N>fl!IT3Mit)kjPNXkOCA_V$eZRl?0S2f~%@xs9;EDC4glXKy@Hi3W}+^KvfXCxEKKQmlw7G literal 0 HcmV?d00001 diff --git a/testdata/data-error.json b/testdata/utf-8/data-error.json similarity index 100% rename from testdata/data-error.json rename to testdata/utf-8/data-error.json diff --git a/testdata/data-error.yml b/testdata/utf-8/data-error.yml similarity index 100% rename from testdata/data-error.yml rename to testdata/utf-8/data-error.yml diff --git a/testdata/data-fail.json b/testdata/utf-8/data-fail.json similarity index 100% rename from testdata/data-fail.json rename to testdata/utf-8/data-fail.json diff --git a/testdata/data-fail.yml b/testdata/utf-8/data-fail.yml similarity index 100% rename from testdata/data-fail.yml rename to testdata/utf-8/data-fail.yml diff --git a/testdata/data-pass.json b/testdata/utf-8/data-pass.json similarity index 100% rename from testdata/data-pass.json rename to testdata/utf-8/data-pass.json diff --git a/testdata/data-pass.yml b/testdata/utf-8/data-pass.yml similarity index 100% rename from testdata/data-pass.yml rename to testdata/utf-8/data-pass.yml diff --git a/testdata/schema.json b/testdata/utf-8/schema.json similarity index 100% rename from testdata/schema.json rename to testdata/utf-8/schema.json diff --git a/testdata/schema.yml b/testdata/utf-8/schema.yml similarity index 100% rename from testdata/schema.yml rename to testdata/utf-8/schema.yml diff --git a/testdata/utf-8_bom/data-error.json b/testdata/utf-8_bom/data-error.json new file mode 100644 index 0000000..6e61837 --- /dev/null +++ b/testdata/utf-8_bom/data-error.json @@ -0,0 +1 @@ +not valid json diff --git a/testdata/utf-8_bom/data-error.yml b/testdata/utf-8_bom/data-error.yml new file mode 100644 index 0000000..f9b03bc --- /dev/null +++ b/testdata/utf-8_bom/data-error.yml @@ -0,0 +1 @@ +invalid: "an escaped \' single quote is not valid yaml \ No newline at end of file diff --git a/testdata/utf-8_bom/data-fail.json b/testdata/utf-8_bom/data-fail.json new file mode 100644 index 0000000..053cc03 --- /dev/null +++ b/testdata/utf-8_bom/data-fail.json @@ -0,0 +1,3 @@ +{ + "bar": "missing foo" +} diff --git a/testdata/utf-8_bom/data-fail.yml b/testdata/utf-8_bom/data-fail.yml new file mode 100644 index 0000000..b07635a --- /dev/null +++ b/testdata/utf-8_bom/data-fail.yml @@ -0,0 +1,2 @@ +--- +bar: missing foo diff --git a/testdata/utf-8_bom/data-pass.json b/testdata/utf-8_bom/data-pass.json new file mode 100644 index 0000000..e091c96 --- /dev/null +++ b/testdata/utf-8_bom/data-pass.json @@ -0,0 +1,4 @@ +{ + "foo": "asdf", + "bar": "zxcv" +} diff --git a/testdata/utf-8_bom/data-pass.yml b/testdata/utf-8_bom/data-pass.yml new file mode 100644 index 0000000..d7e16a7 --- /dev/null +++ b/testdata/utf-8_bom/data-pass.yml @@ -0,0 +1,3 @@ +--- +foo: asdf +bar: zxcv diff --git a/testdata/utf-8_bom/schema.json b/testdata/utf-8_bom/schema.json new file mode 100644 index 0000000..5e26417 --- /dev/null +++ b/testdata/utf-8_bom/schema.json @@ -0,0 +1,7 @@ +{ + "properties": { + "foo": { "type": "string" }, + "bar": {} + }, + "required": ["foo"] +} diff --git a/testdata/utf-8_bom/schema.yml b/testdata/utf-8_bom/schema.yml new file mode 100644 index 0000000..f2c99d5 --- /dev/null +++ b/testdata/utf-8_bom/schema.yml @@ -0,0 +1,7 @@ +--- +properties: + foo: + type: string + bar: {} +required: + - foo From 806c9cd19a24e27e91c31a1f56f140cbcae32c02 Mon Sep 17 00:00:00 2001 From: Neil Pankey Date: Thu, 17 Sep 2020 21:08:40 -0700 Subject: [PATCH 3/4] yajsv: UTF-16 and BOM handling --- main.go | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 8ee1767..766b97e 100644 --- a/main.go +++ b/main.go @@ -2,8 +2,11 @@ // a provided JSON Schema - https://json-schema.org/ package main +//go:generate go run gen_testdata.go + import ( "bufio" + "bytes" "flag" "fmt" "io" @@ -15,21 +18,37 @@ import ( "strings" "sync" + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/unicode" + "github.com/ghodss/yaml" "github.com/mitchellh/go-homedir" "github.com/xeipuuv/gojsonschema" ) var ( - version = "v1.3.0-dev" + version = "v1.4.0-dev" schemaFlag = flag.String("s", "", "primary JSON schema to validate against, required") quietFlag = flag.Bool("q", false, "quiet, only print validation failures and errors") versionFlag = flag.Bool("v", false, "print version and exit") + bomFlag = flag.Bool("b", false, "allow BOM in JSON files, error if seen and unset") listFlags stringFlags refFlags stringFlags ) +// https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding +const ( + bomUTF8 = "\xEF\xBB\xBF" + bomUTF16BE = "\xFE\xFF" + bomUTF16LE = "\xFF\xFE" +) + +var ( + encUTF16BE = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) + encUTF16LE = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) +) + func init() { flag.Var(&listFlags, "l", "validate JSON documents from newline separated paths and/or globs in a text file (relative to the basename of the file itself)") flag.Var(&refFlags, "r", "referenced schema(s), can be globs and/or used multiple times") @@ -131,7 +150,6 @@ func realMain(args []string, w io.Writer) int { sem <- 0 defer func() { <-sem }() - loader, err := jsonLoader(path) if err != nil { msg := fmt.Sprintf("%s: error: load doc: %s", path, err) @@ -190,14 +208,57 @@ func jsonLoader(path string) (gojsonschema.JSONLoader, error) { } switch filepath.Ext(path) { case ".yml", ".yaml": + // TODO YAML requires the precense of a BOM to detect UTF-16 + // text. Is there a decent hueristic to detect UTF-16 text + // missing a BOM so we can provide a better error message? buf, err = yaml.YAMLToJSON(buf) + default: + buf, err = jsonDecodeCharset(buf) } if err != nil { return nil, err } + // TODO What if we have an empty document? return gojsonschema.NewBytesLoader(buf), nil } +// jsonDecodeCharset attempts to detect UTF-16 (LE or BE) JSON text and +// decode as appropriate. It also skips a BOM at the start of the buffer +// if `-b` was specified. Presence of a BOM is an error otherwise. +func jsonDecodeCharset(buf []byte) ([]byte, error) { + if len(buf) < 2 { // UTF-8 + return buf, nil + } + + bom := "" + var enc encoding.Encoding + switch { + case bytes.HasPrefix(buf, []byte(bomUTF8)): + bom = bomUTF8 + case bytes.HasPrefix(buf, []byte(bomUTF16BE)): + bom = bomUTF16BE + enc = encUTF16BE + case bytes.HasPrefix(buf, []byte(bomUTF16LE)): + bom = bomUTF16LE + enc = encUTF16LE + case buf[0] == 0: + enc = encUTF16BE + case buf[1] == 0: + enc = encUTF16LE + } + + if bom != "" { + if !*bomFlag { + return nil, fmt.Errorf("unexpected BOM, see `-b` flag") + } + buf = buf[len(bom):] + } + if enc != nil { + return enc.NewDecoder().Bytes(buf) + } + return buf, nil +} + func printUsage() { fmt.Fprintf(os.Stderr, `Usage: %s -s schema.(json|yml) [options] document.(json|yml) ... From 87a159c4587a194fa9386fb340980fec53bd61cf Mon Sep 17 00:00:00 2001 From: Neil Pankey Date: Tue, 6 Oct 2020 16:03:49 -0700 Subject: [PATCH 4/4] utf16: Schema errors and encoding tests --- gen_testdata.go | 85 ++++++++++++++++++++++----------------------- main.go | 28 +++++++++------ main_test.go | 92 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 54 deletions(-) diff --git a/gen_testdata.go b/gen_testdata.go index 062497b..fed4aaf 100644 --- a/gen_testdata.go +++ b/gen_testdata.go @@ -1,57 +1,56 @@ // +build ignore -// generates clones the utf-8 tests data to the other +// gen_testdata clones the utf-8 tests data to the other // unicode encodings and adds BOM variants of each. package main import ( - "io/ioutil" - "log" - "os" - "path/filepath" + "io/ioutil" + "log" + "os" + "path/filepath" - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/unicode" + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/unicode" ) - func main() { - var xforms = []struct { - dir, bom string - enc encoding.Encoding - } { - { "testdata/utf-16be", "\xFE\xFF", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) }, - { "testdata/utf-16le", "\xFF\xFE", unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) }, - } - - paths, _ := filepath.Glob("testdata/utf-8/*") - for _, p := range paths { - src, err := ioutil.ReadFile(p) - if err != nil { - log.Fatal(err) - } - - write("testdata/utf-8_bom", p, "\xEF\xBB\xBF", src) - for _, xform := range xforms { - dst, err := xform.enc.NewEncoder().Bytes(src) - if err != nil { - log.Fatal(err) - } - write(xform.dir, p, "", dst) - write(xform.dir + "_bom", p, xform.bom, dst) - } - } + var xforms = []struct { + dir, bom string + enc encoding.Encoding + }{ + {"testdata/utf-16be", "\xFE\xFF", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)}, + {"testdata/utf-16le", "\xFF\xFE", unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)}, + } + + paths, _ := filepath.Glob("testdata/utf-8/*") + for _, p := range paths { + src, err := ioutil.ReadFile(p) + if err != nil { + log.Fatal(err) + } + + write("testdata/utf-8_bom", p, "\xEF\xBB\xBF", src) + for _, xform := range xforms { + dst, err := xform.enc.NewEncoder().Bytes(src) + if err != nil { + log.Fatal(err) + } + write(xform.dir, p, "", dst) + write(xform.dir+"_bom", p, xform.bom, dst) + } + } } func write(dir, orig, bom string, buf []byte) { - f, err := os.Create(filepath.Join(dir, filepath.Base(orig))) - if err != nil { - log.Fatal(err) - } - if _, err = f.Write([]byte(bom)); err != nil { - log.Fatal(err) - } - if _, err = f.Write(buf); err != nil { - log.Fatal(err) - } + f, err := os.Create(filepath.Join(dir, filepath.Base(orig))) + if err != nil { + log.Fatal(err) + } + if _, err = f.Write([]byte(bom)); err != nil { + log.Fatal(err) + } + if _, err = f.Write(buf); err != nil { + log.Fatal(err) + } } diff --git a/main.go b/main.go index 766b97e..bf33441 100644 --- a/main.go +++ b/main.go @@ -79,7 +79,7 @@ func realMain(args []string, w io.Writer) int { dir := filepath.Dir(list) f, err := os.Open(list) if err != nil { - log.Fatalf("%s: %s\n", list, err) + return schemaError("%s: %s", list, err) } defer f.Close() @@ -93,7 +93,7 @@ func realMain(args []string, w io.Writer) int { docs = append(docs, glob(pattern)...) } if err := scanner.Err(); err != nil { - log.Fatalf("%s: invalid file list: %s\n", list, err) + return schemaError("%s: invalid file list: %s", list, err) } } if len(docs) == 0 { @@ -104,13 +104,13 @@ func realMain(args []string, w io.Writer) int { sl := gojsonschema.NewSchemaLoader() schemaPath, err := filepath.Abs(*schemaFlag) if err != nil { - log.Fatalf("%s: unable to convert to absolute path: %s\n", *schemaFlag, err) + return schemaError("%s: unable to convert to absolute path: %s", *schemaFlag, err) } for _, ref := range refFlags { for _, p := range glob(ref) { absPath, err := filepath.Abs(p) if err != nil { - log.Fatalf("%s: unable to convert to absolute path: %s\n", absPath, err) + return schemaError("%s: unable to convert to absolute path: %s", absPath, err) } if absPath == schemaPath { @@ -119,22 +119,22 @@ func realMain(args []string, w io.Writer) int { loader, err := jsonLoader(absPath) if err != nil { - log.Fatalf("%s: unable to load schema ref: %s\n", *schemaFlag, err) + return schemaError("%s: unable to load schema ref: %s", *schemaFlag, err) } if err := sl.AddSchemas(loader); err != nil { - log.Fatalf("%s: invalid schema: %s\n", p, err) + return schemaError("%s: invalid schema: %s", p, err) } } } schemaLoader, err := jsonLoader(schemaPath) if err != nil { - log.Fatalf("%s: unable to load schema: %s\n", *schemaFlag, err) + return schemaError("%s: unable to load schema: %s", *schemaFlag, err) } schema, err := sl.Compile(schemaLoader) if err != nil { - log.Fatalf("%s: invalid schema: %s\n", *schemaFlag, err) + return schemaError("%s: invalid schema: %s", *schemaFlag, err) } // Validate the schema against each doc in parallel, limiting simultaneous @@ -262,8 +262,8 @@ func jsonDecodeCharset(buf []byte) ([]byte, error) { func printUsage() { fmt.Fprintf(os.Stderr, `Usage: %s -s schema.(json|yml) [options] document.(json|yml) ... - yajsv validates JSON and YAML document(s) against a schema. One of three statuses are - reported per document: + yajsv validates JSON and YAML document(s) against a schema. One of three status + results are reported per document: pass: Document is valid relative to the schema fail: Document is invalid relative to the schema @@ -273,7 +273,8 @@ func printUsage() { schema validation failure. Sets the exit code to 1 on any failures, 2 on any errors, 3 on both, 4 on - invalid usage. Otherwise, 0 is returned if everything passes validation. + invalid usage, 5 on schema definition or file-list errors. Otherwise, 0 is + returned if everything passes validation. Options: @@ -288,6 +289,11 @@ func usageError(msg string) int { return 4 } +func schemaError(format string, args ...interface{}) int { + fmt.Fprintf(os.Stderr, format+"\n", args...) + return 5 +} + // glob is a wrapper that also resolves `~` since we may be skipping // the shell expansion when single-quoting globs at the command line func glob(pattern string) []string { diff --git a/main_test.go b/main_test.go index 878fe8b..42d0413 100644 --- a/main_test.go +++ b/main_test.go @@ -1,12 +1,23 @@ package main import ( + "fmt" + "os" "path/filepath" "sort" "strings" "testing" ) +func init() { + // TODO: Cleanup this global monkey-patching + devnull, err := os.Open(os.DevNull) + if err != nil { + panic(err) + } + os.Stderr = devnull +} + func TestMain(t *testing.T) { tests := []struct { in string @@ -14,6 +25,10 @@ func TestMain(t *testing.T) { exit int }{ { + "-s testdata/utf-16be_bom/schema.json testdata/utf-16le_bom/data-fail.yml", + []string{}, + 5, + }, { "-s testdata/utf-8/schema.yml testdata/utf-8/data-pass.yml", []string{"testdata/utf-8/data-pass.yml: pass"}, 0, @@ -89,3 +104,80 @@ func TestMain(t *testing.T) { }) } } + +func TestMatrix(t *testing.T) { + // schema.{format} {encoding}{_bom}/data-{expect}.{format} + type testcase struct { + schemaEnc, schemaFmt string + dataEnc, dataFmt, dataRes string + allowBOM bool + } + + encodings := []string{"utf-8", "utf-16be", "utf-16le", "utf-8_bom", "utf-16be_bom", "utf-16le_bom"} + formats := []string{"json", "yml"} + results := []string{"pass", "fail", "error"} + tests := []testcase{} + + // poor mans cartesian product + for _, senc := range encodings { + for _, sfmt := range formats { + for _, denc := range encodings { + for _, dfmt := range formats { + for _, dres := range results { + tests = append(tests, testcase{senc, sfmt, denc, dfmt, dres, false}) + tests = append(tests, testcase{senc, sfmt, denc, dfmt, dres, true}) + } + } + } + } + } + + for _, tt := range tests { + schemaBOM := strings.HasSuffix(tt.schemaEnc, "_bom") + schema16 := strings.HasPrefix(tt.schemaEnc, "utf-16") + dataBOM := strings.HasSuffix(tt.dataEnc, "_bom") + data16 := strings.HasPrefix(tt.dataEnc, "utf-16") + + schema := fmt.Sprintf("testdata/%s/schema.%s", tt.schemaEnc, tt.schemaFmt) + data := fmt.Sprintf("testdata/%s/data-%s.%s", tt.dataEnc, tt.dataRes, tt.dataFmt) + cmd := fmt.Sprintf("-s %s %s", schema, data) + if tt.allowBOM { + cmd = "-b " + cmd + } + + t.Run(cmd, func(t *testing.T) { + want := 0 + switch { + // Schema Errors (exit = 5) + // - YAML w/out BOM for UTF-16 + // - JSON w/ BOM but missing allowBOM flag + case tt.schemaFmt == "yml" && !schemaBOM && schema16: + want = 5 + case tt.schemaFmt == "json" && schemaBOM && !tt.allowBOM: + want = 5 + // Data Errors (exit = 2) + // - YAML w/out BOM for UTF-16 + // - JSON w/ BOM but missing allowBOM flag + // - standard malformed files (e.g. data-error) + case tt.dataFmt == "yml" && !dataBOM && data16: + want = 2 + case tt.dataFmt == "json" && dataBOM && !tt.allowBOM: + want = 2 + case tt.dataRes == "error": + want = 2 + // Data Failures + case tt.dataRes == "fail": + want = 1 + } + + // TODO: Cleanup this global monkey-patching + *bomFlag = tt.allowBOM + + var w strings.Builder + got := realMain(strings.Split(cmd, " "), &w) + if got != want { + t.Errorf("got(%d) != want(%d) bomflag %t", got, want, *bomFlag) + } + }) + } +}