Skip to content

Commit

Permalink
Modify document for elasticsearch migration.
Browse files Browse the repository at this point in the history
  • Loading branch information
damienr74 committed Aug 16, 2019
1 parent e0d388c commit df779fd
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 226 deletions.
196 changes: 71 additions & 125 deletions internal/search/doc/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,117 +6,53 @@ import (
"time"

"sigs.k8s.io/yaml"

"google.golang.org/appengine/search"
)

const (
identifierStr = "identifier"
documentStr = "document"
repoURLStr = "repo_url"
filePathStr = "file_path"
creationTimeStr = "creation_time"
)

// Represents an unbreakable character stream.
type Atom = search.Atom

// Implements search.FieldLoadSaver in order to index this representation of a kustomization.yaml
// file.
// This document is meant to be used at the elasticsearch document type.
// Fields are serialized as-is to elasticsearch, where indices are built
// to facilitate text search queries. Identifiers, Values, FilePath,
// RepositoryURL and DocumentData are meant to be searched for text queries
// directly, while the other fields can either be used as a filter, or as
// additional metadata displayed in the UI.
//
// The fields of the document and their purpose are listed below:
// - DocumentData contains the contents of the kustomization file.
// - Kinds Represents the kubernetes Kinds that are in this file.
// - Identifiers are a list of (partial and full) identifier paths that can be
// found by users. Each part of a path is delimited by ":" e.g. spec:replicas.
// - Values are a list of identifier paths and their values that can be found by
// search queries. The path is delimited by ":" and the value follows the "="
// symbol e.g. spec:replicas=4.
// - FilePath is the path of the file.
// - RepositoryURL is the URL of the source repository.
// - CreationTime is the time at which the file was created.
//
// Representing each Identifier and Value as a flat string representation
// facilitates the use of complex text search features from elasticsearch such
// as fuzzy searching, regex, wildcards, etc.
type KustomizationDocument struct {
identifiers []Atom
FilePath Atom
RepositoryURL Atom
DocumentData string
CreationTime time.Time
DocumentData string `json:"document,omitempty"`
Kinds []string `json:"kinds,omitempty"`
Identifiers []string `json:"identifiers,omitempty"`
Values []string `json:"values,omitempty"`
FilePath string `json:"filePath,omitempty"`
RepositoryURL string `json:"repositoryUrl,omitempty"`
CreationTime time.Time `json:"creationTime,omitempty"`
}

// Partially implements search.FieldLoadSaver.
func (k *KustomizationDocument) Load(fields []search.Field, metadata *search.DocumentMetadata) error {
k.identifiers = make([]search.Atom, 0)
wrongTypeError := func(name string, expected interface{}, actual interface{}) error {
return fmt.Errorf("%s expects type %T, found %#v", name, expected, actual)
}

for _, f := range fields {
switch f.Name {
case identifierStr:
identifier, ok := f.Value.(search.Atom)
if !ok {
return wrongTypeError(f.Name, identifier, f.Value)
}
k.identifiers = append(k.identifiers, identifier)

case documentStr:
document, ok := f.Value.(string)
if !ok {
return wrongTypeError(f.Name, document, f.Value)
}
k.DocumentData = document

case filePathStr:
fp, ok := f.Value.(search.Atom)
if !ok {
return wrongTypeError(f.Name, fp, f.Value)
}
k.FilePath = fp

case repoURLStr:
url, ok := f.Value.(search.Atom)
if !ok {
return wrongTypeError(f.Name, url, f.Value)
}
k.RepositoryURL = url

case creationTimeStr:
time, ok := f.Value.(time.Time)
if !ok {
return wrongTypeError(f.Name, time, f.Value)
}
k.CreationTime = time
default:
return fmt.Errorf("KustomizationDocument field %s not recognized", f.Name)
}
}

return nil
}

// Partially implements search.FieldLoadSaver.
func (k *KustomizationDocument) Save() ([]search.Field, *search.DocumentMetadata, error) {
err := k.ParseYAML()
if err != nil {
return nil, nil, err
}

extraFields := []search.Field{
{Name: documentStr, Value: k.DocumentData},
{Name: filePathStr, Value: k.FilePath},
{Name: repoURLStr, Value: k.RepositoryURL},
{Name: creationTimeStr, Value: k.CreationTime},
}

fields := make([]search.Field, 0, len(k.identifiers)+len(extraFields))
for _, identifier := range k.identifiers {
fields = append(fields, search.Field{Name: identifierStr, Value: identifier})
}
fields = append(fields, extraFields...)

return fields, nil, nil
}

func (k *KustomizationDocument) ParseYAML() error {
k.identifiers = make([]Atom, 0)
func (doc *KustomizationDocument) ParseYAML() error {
doc.Identifiers = make([]string, 0)
doc.Values = make([]string, 0)

var kustomization map[string]interface{}
err := yaml.Unmarshal([]byte(k.DocumentData), &kustomization)
err := yaml.Unmarshal([]byte(doc.DocumentData), &kustomization)
if err != nil {
return fmt.Errorf("unable to parse kustomization file: %s", err)
}

type Map struct {
data map[string]interface{}
prefix Atom
prefix string
}

toVisit := []Map{
Expand All @@ -126,43 +62,53 @@ func (k *KustomizationDocument) ParseYAML() error {
},
}

atomJoin := func(vals ...interface{}) Atom {
strs := make([]string, 0, len(vals))
for _, val := range vals {
strs = append(strs, fmt.Sprint(val))
}
return Atom(strings.Trim(strings.Join(strs, " "), " "))
}

set := make(map[Atom]struct{})

identifierSet := make(map[string]struct{})
valueSet := make(map[string]struct{})
for i := 0; i < len(toVisit); i++ {
visiting := toVisit[i]
for k, v := range visiting.data {
set[atomJoin(visiting.prefix, k)] = struct{}{}
switch value := v.(type) {
case map[string]interface{}:
toVisit = append(toVisit, Map{
data: value,
prefix: atomJoin(visiting.prefix, fmt.Sprint(k)),
})
case []interface{}:
for _, val := range value {
submap, ok := val.(map[string]interface{})
if !ok {
continue
}
identifier := fmt.Sprintf("%s:%s", visiting.prefix,
strings.Replace(k, ":", "%3A", -1))
// noop after the first iteration.
identifier = strings.TrimLeft(identifier, ":")

// Recursive function traverses structure to find
// identifiers and values. These later get formatted
// into doc.Identifiers and doc.Values respectively.
var traverseStructure func(interface{})
traverseStructure = func(arg interface{}) {
switch value := arg.(type) {
case map[string]interface{}:
toVisit = append(toVisit, Map{
data: submap,
prefix: atomJoin(visiting.prefix, fmt.Sprint(k)),
data: value,
prefix: identifier,
})
case []interface{}:
for _, val := range value {
traverseStructure(val)
}
case interface{}:
esc := strings.Replace(fmt.Sprintf("%v",
value), ":", "%3A", -1)

valuePath := fmt.Sprintf("%s=%v",
identifier, esc)
valueSet[valuePath] = struct{}{}
}
}
traverseStructure(v)

identifierSet[identifier] = struct{}{}

}
}

for key := range set {
k.identifiers = append(k.identifiers, key)
for val := range valueSet {
doc.Values = append(doc.Values, val)
}

for key := range identifierSet {
doc.Identifiers = append(doc.Identifiers, key)
}

return nil
Expand Down
Loading

0 comments on commit df779fd

Please sign in to comment.