Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate content of folder items #41

Merged
merged 14 commits into from
Sep 1, 2020
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion code/go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ require (
github.com/pkg/errors v0.9.1
github.com/rakyll/statik v0.1.7
github.com/stretchr/testify v1.6.1
github.com/xeipuuv/gojsonschema v1.2.0
golang.org/x/lint v0.0.0-20200302205851-738671d3881b // indirect
golang.org/x/tools v0.0.0-20200826040757-bc8aaaa29e06 // indirect
golang.org/x/tools v0.0.0-20200828161849-5deb26317202 // indirect
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c
)
11 changes: 11 additions & 0 deletions code/go/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/rakyll/statik v0.1.7 h1:OF3QCZUuyPxuGEP7B4ypUa7sB/iHtqOTDYZXGM8KOdQ=
github.com/rakyll/statik v0.1.7/go.mod h1:AlZONWzMtEnMs7W4e/1LURLiI49pIMmp6V9Unghqrcc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
Expand All @@ -35,6 +42,10 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7 h1:EBZoQjiKKPaLbPrbpssUfuH
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200826040757-bc8aaaa29e06 h1:ChBCbOHeLqK+j+znGPlWCcvx/t2PdxmyPBheVZxXbcc=
golang.org/x/tools v0.0.0-20200826040757-bc8aaaa29e06/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200827010519-17fd2f27a9e3 h1:r3P/5xOq/dK1991B65Oy6E1fRF/2d/fSYZJ/fXGVfJc=
golang.org/x/tools v0.0.0-20200827010519-17fd2f27a9e3/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200828161849-5deb26317202 h1:DrWbY9UUFi/sl/3HkNVoBjDbGfIPZZfgoGsGxOL1EU8=
golang.org/x/tools v0.0.0-20200828161849-5deb26317202/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Expand Down
2 changes: 1 addition & 1 deletion code/go/internal/spec/statik.go

Large diffs are not rendered by default.

31 changes: 31 additions & 0 deletions code/go/internal/validator/common_spec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package validator

import (
"github.com/creasty/defaults"
"github.com/pkg/errors"
)

type commonSpec struct {
AdditionalContents bool `yaml:"additionalContents"`
Contents []folderItemSpec `yaml:"contents"`
}


func setDefaultValues(spec *commonSpec) error {
err := defaults.Set(spec)
if err != nil {
return errors.Wrap(err, "could not set default values")
}

if len(spec.Contents) == 0 {
return nil
}

for i := range spec.Contents {
err = setDefaultValues(&spec.Contents[i].commonSpec)
if err != nil {
return err
}
}
return nil
}
166 changes: 166 additions & 0 deletions code/go/internal/validator/folder_item_spec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
package validator

import (
"encoding/json"
"fmt"
"gopkg.in/yaml.v3"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"regexp"

"github.com/pkg/errors"
"github.com/xeipuuv/gojsonschema"
)

type folderItemSpec struct {
Description string `yaml:"description"`
ItemType string `yaml:"type"`
ContentMediaType string `yaml:"contentMediaType"`
Name string `yaml:"name"`
Pattern string `yaml:"pattern"`
Required bool `yaml:"required"`
Ref string `yaml:"$ref"`
Visibility string `yaml:"visibility" default:"public"`
commonSpec `yaml:",inline"`
}

type itemSchemaSpec struct {
Spec map[string]interface{} `json:"spec" yaml:"spec"`
}

func (s *folderItemSpec) matchingFileExists(files []os.FileInfo) (bool, error) {
if s.Name != "" {
for _, file := range files {
if file.Name() == s.Name {
return s.isSameType(file), nil
}
}
} else if s.Pattern != "" {
for _, file := range files {
isMatch, err := regexp.MatchString(s.Pattern, file.Name())
if err != nil {
return false, errors.Wrap(err, "invalid folder item spec pattern")
}
if isMatch {
return s.isSameType(file), nil
}
}
}

return false, nil
}

func (s *folderItemSpec) isSameType(file os.FileInfo) bool {
switch s.ItemType {
case itemTypeFile:
return !file.IsDir()
case itemTypeFolder:
return file.IsDir()
}

return false
}

func (s *folderItemSpec) validate(fs http.FileSystem, folderSpecPath string, itemPath string) ValidationErrors {
if s.Ref == "" {
return nil // no item's schema defined
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the schema is defined inline and not referenced via $ref?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if I follow you...
this might be bad wording, should be: schema reference not provided. Does it sound better?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I was referring to needing something like this: https://github.com/elastic/package-spec/blob/master/code/go/internal/validator/folder.go#L125-L141. Maybe this is not the right place for it but then it should be added where this method is called from.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, now I see... I totally missed the case where schema is defined inline, didn't know about this. I will improve the implementation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ycombinator Hm.. I dived into this topic and got a bit confused. Do we really need it? What would a use case for this? This $ref refers to a place in folder schema in which we refer to item's schema. It's not part of the JSON-schema, so I believe we can enforce to keep it in a separate file (to separate concerns and prevent from creating a huge all-in-one files).

tl;dr $ref in folder schema is always external.

WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So in the folder schema, there can be two types of items: folder (for describing the structure of a sub-folder within the folder) and file (for describing the structure of a file within the folder).

For type: folder, we already have a couple of places in the spec where use $ref (example) and a couple other places where we inline the definition instead of using $ref (example).

For type: file, I checked, and at the moment we only have places where we use $ref (example). We do not yet have any places where we are inlining the spec (aka schema) for the file itself. However, I can see this being useful for simple file specs (example).

So I think there is a use case for it and we should support it.

Also, if we allowing inline specs for sub-folder items but not for file items, we are introducing an inconsistency. We could, of course, resolve this inconsistency by going the other way: that is, we can say that we only allow $refs for item specs, regardless of whether those are folder items or file items; that we do not accept inline specs for either item type. I think this is slightly inconvenient when the item has a small spec but I'd be okay with this approach too. If you prefer this, then let's make a separate PR to first remove support for inline specs for type: folder items and adjust the spec files accordingly.

}

schemaPath := filepath.Join(filepath.Dir(folderSpecPath), s.Ref)
schemaData, err := loadItemSchema(fs, schemaPath)
if err != nil {
return ValidationErrors{errors.Wrapf(err, "loading item schema failed (path %s)", schemaPath)}
}

// loading item content
itemData, err := loadItemContent(itemPath, s.ContentMediaType)
if err != nil {
return ValidationErrors{errors.Wrapf(err, "loading item content failed (path %s)", itemPath)}
}

// validation with schema
errs := validateData(schemaData, itemData)
if errs != nil {
return errs
}
return nil
}

func loadItemSchema(fs http.FileSystem, itemSchemaPath string) ([]byte, error) {
itemSchemaFile, err := fs.Open(itemSchemaPath)
if err != nil {
return nil, errors.Wrap(err, "opening schema file failed")
}
defer itemSchemaFile.Close()

itemSchemaData, err := ioutil.ReadAll(itemSchemaFile)
ycombinator marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, errors.Wrap(err, "reading schema file failed")
}

if len(itemSchemaData) == 0 {
return nil, errors.New("schema file is empty")
}

var schema itemSchemaSpec
err = yaml.Unmarshal(itemSchemaData, &schema)
if err != nil {
return nil, errors.Wrapf(err, "schema unmarshalling failed (path: %s)", itemSchemaPath)
}

schemaData, err := json.Marshal(&schema.Spec)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for this PR but we could probably convert the schema YAML files to JSON as part of the make update step so we don't have to do this yaml.Unmarshal + json.Marshal dance every time at run time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this could be an improvement in the future (convertion is always error prone), although I admin that YAML here is much more human readable and easier to interact.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, which is why we'd keep the source spec files in YAML but only convert them to JSON when make update is run.

if err != nil {
return nil, errors.Wrapf(err, "marshalling schema to JSON format failed")
}
return schemaData, nil
}

func loadItemContent(itemPath, mediaType string) ([]byte, error) {
itemData, err := ioutil.ReadFile(itemPath)
if err != nil {
return nil, errors.Wrap(err, "reading item file failed")
}

if len(itemData) == 0 {
return nil, errors.New("file is empty")
}

switch mediaType {
case "application/x-yaml":
var c interface{}
err = yaml.Unmarshal(itemData, &c)
if err != nil {
return nil, errors.Wrapf(err, "unmarshalling YAML file failed (path: %s)", itemPath)
}

itemData, err = json.Marshal(&c)
if err != nil {
return nil, errors.Wrapf(err, "converting YAML file to JSON failed (path: %s)", itemPath)
}
case "application/json": // no need to convert the item content
default:
return nil, fmt.Errorf("unsupported media type (%s)", mediaType)
}
return itemData, nil
}

func validateData(schemaData, itemData []byte) ValidationErrors {
schemaLoader := gojsonschema.NewBytesLoader(schemaData)
documentLoader := gojsonschema.NewBytesLoader(itemData)
result, err := gojsonschema.Validate(schemaLoader, documentLoader)
if err != nil {
return ValidationErrors{err}
}

if result.Valid() {
return nil // item content is valid according to the loaded schema
}

var errs ValidationErrors
for _, re := range result.Errors() {
errs = append(errs, fmt.Errorf("field %s: %s", re.Field(), re.Description()))
}
return errs
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ import (
"fmt"
"io/ioutil"
"net/http"
"os"
"path"
"path/filepath"
"regexp"

"github.com/creasty/defaults"
"github.com/pkg/errors"
"gopkg.in/yaml.v3"
)
Expand All @@ -28,23 +26,6 @@ type folderSpec struct {
commonSpec
}

type folderItemSpec struct {
Description string `yaml:"description"`
ItemType string `yaml:"type"`
ContentMediaType string `yaml:"contentMediaType"`
Name string `yaml:"name"`
Pattern string `yaml:"pattern"`
Required bool `yaml:"required"`
Ref string `yaml:"$ref"`
Visibility string `yaml:"visibility" default:"public"`
commonSpec `yaml:",inline"`
}

type commonSpec struct {
AdditionalContents bool `yaml:"additionalContents"`
Contents []folderItemSpec `yaml:"contents"`
}

func newFolderSpec(fs http.FileSystem, specPath string) (*folderSpec, error) {
specFile, err := fs.Open(specPath)
if err != nil {
Expand Down Expand Up @@ -151,7 +132,14 @@ func (s *folderSpec) validate(folderPath string) ValidationErrors {
errs = append(errs, fmt.Errorf("[%s] is a file but is expected to be a folder", fileName))
continue
}
// TODO: more validation for file item

itemPath := filepath.Join(folderPath, file.Name())
itemValidationErrs := itemSpec.validate(s.fs, s.specPath, itemPath)
if itemValidationErrs != nil {
for _, ive := range itemValidationErrs {
errs = append(errs, errors.Wrapf(ive, "file \"%s\" is invalid", itemPath))
}
}
}
}

Expand Down Expand Up @@ -198,56 +186,4 @@ func (s *folderSpec) findItemSpec(folderItemName string) (*folderItemSpec, error

// No item spec found
return nil, nil
}

func (s *folderItemSpec) matchingFileExists(files []os.FileInfo) (bool, error) {
if s.Name != "" {
for _, file := range files {
if file.Name() == s.Name {
return s.isSameType(file), nil
}
}
} else if s.Pattern != "" {
for _, file := range files {
isMatch, err := regexp.MatchString(s.Pattern, file.Name())
if err != nil {
return false, errors.Wrap(err, "invalid folder item spec pattern")
}
if isMatch {
return s.isSameType(file), nil
}
}
}

return false, nil
}

func (s *folderItemSpec) isSameType(file os.FileInfo) bool {
switch s.ItemType {
case itemTypeFile:
return !file.IsDir()
case itemTypeFolder:
return file.IsDir()
}

return false
}

func setDefaultValues(spec *commonSpec) error {
err := defaults.Set(spec)
if err != nil {
return errors.Wrap(err, "could not set default values")
}

if len(spec.Contents) == 0 {
return nil
}

for i := range spec.Contents {
err = setDefaultValues(&spec.Contents[i].commonSpec)
if err != nil {
return err
}
}
return nil
}
}
Empty file.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"1expected": [
ycombinator marked this conversation as resolved.
Show resolved Hide resolved
"expected": [
{
"event.category": [
"web"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
- name: source
title: Source
group: 2
type: group
fields:
- name: geo.city_name
level: core
type: keyword
description: City name.
ignore_above: 1024
- name: geo.location
level: core
type: geo_point
description: Longitude and latitude.
- name: geo.region_iso_code
level: core
type: keyword
description: Region ISO code.
ignore_above: 1024
- name: geo.region_name
level: core
type: keyword
description: Region name.
ignore_above: 1024
Loading