diff --git a/cmd/app/cmd.go b/cmd/app/cmd.go index 4399480a..512af7e5 100644 --- a/cmd/app/cmd.go +++ b/cmd/app/cmd.go @@ -6,6 +6,7 @@ import ( "io" "os" + "github.com/gardener/docforge/pkg/api" "github.com/gardener/docforge/pkg/hugo" "github.com/spf13/cobra" "k8s.io/klog/v2" @@ -37,13 +38,17 @@ func NewCommand(ctx context.Context, cancel context.CancelFunc) *cobra.Command { cmd := &cobra.Command{ Use: "docforge", Short: "Build documentation bundle", - Run: func(cmd *cobra.Command, args []string) { + RunE: func(cmd *cobra.Command, args []string) error { options := NewOptions(flags) doc := Manifest(flags.documentationManifestPath) - reactor := NewReactor(ctx, options) + if err := api.ValidateManifest(doc); err != nil { + return err + } + reactor := NewReactor(ctx, options, doc.Links) if err := reactor.Run(ctx, doc, flags.dryRun); err != nil { - klog.Errorf(err.Error()) + return err } + return nil }, } diff --git a/cmd/app/factory.go b/cmd/app/factory.go index 522217ef..3faba0ae 100644 --- a/cmd/app/factory.go +++ b/cmd/app/factory.go @@ -5,6 +5,7 @@ import ( "io" "path/filepath" + "github.com/gardener/docforge/pkg/api" "github.com/gardener/docforge/pkg/hugo" "github.com/gardener/docforge/pkg/metrics" "github.com/gardener/docforge/pkg/resourcehandlers" @@ -44,7 +45,7 @@ type Metering struct { } // NewReactor creates a Reactor from Options -func NewReactor(ctx context.Context, options *Options) *reactor.Reactor { +func NewReactor(ctx context.Context, options *Options, globalLinksCfg *api.Links) *reactor.Reactor { dryRunWriters := writers.NewDryRunWritersFactory(options.DryRunWriter) o := &reactor.Options{ MaxWorkersCount: options.MaxWorkersCount, @@ -59,6 +60,7 @@ func NewReactor(ctx context.Context, options *Options) *reactor.Reactor { ResourceHandlers: initResourceHandlers(ctx, options), DryRunWriter: dryRunWriters, Resolve: options.Resolve, + GlobalLinksConfig: globalLinksCfg, } if options.DryRunWriter != nil { o.Writer = dryRunWriters.GetWriter(options.DestinationPath) diff --git a/example/advanced/00.yaml b/example/advanced/00.yaml index abf6761b..6f1d75b9 100644 --- a/example/advanced/00.yaml +++ b/example/advanced/00.yaml @@ -1,69 +1,62 @@ -root: - name: doc +structure: + - name: doc nodes: - name: overview - contentSelectors: - - source: https://github.com/gardener/documentation/wiki/Architecture.md - - name: gardenlet - contentSelectors: - - source: https://github.com/gardener/gardener/blob/master/docs/concepts/gardenlet.md + source: https://github.com/gardener/documentation/wiki/Architecture.md + - source: https://github.com/gardener/gardener/blob/master/docs/concepts/gardenlet.md # linkSubstitutes define changes to links in documents.They apply to links and images # specified with markdown markup. - linksSubstitutes: + links: # The key in the mapping is an absolute form of a document link that will be # subject to transformation - "https://github.com/gardener/gardener/blob/master/docs/usage/shooted_seed.md": - # destination is the link reference URL. If it is empty string, - # the links markup is removed leaving only link text behind. - # For images, the entire markup is removed. - destination: "" - "https://kubernetes.io/docs/concepts/extend-kubernetes/operator/": - destination: "https://kubernetes.io/docs/concepts/extend-kubernetes/operator1111" - # text is a link text element (alt-text for images). Specifying text - # will change it to the new value. Empty string is valid only with - # `destination=""` - text: smooth operator - # title is the title element of a link or image. Specifying text - # will change it to the new value. - title: a title - # localityDomains can be specified on node level too. - # Node's localityDomain definitions override and amend global ones. - localityDomain: - github.com/gardener/gardener: - version: v1.11.1 - path: gardener/gardener - # exclude omits resources from path. You can - # also use include with the reverse semantics - exclude: - - example - # downloadSubstitutes is a list of regular expressions matching - # links to resources on documents that will be downloaded, mapped - # to name expressions tha define how the downloaded resources will - # named. - # There is a set of variables that can be used to construct the - # expressions: - # - $name: the original name of the resource - # - $path: the original path of the resource - # - $uuid: a UUID generated for the resource - # - $ext: a original resource extension - # The default expression applying to all resources is: $uuid.$ext - # Besides regular-expression-to-expression mappings it is possible - # to map exact URLs (escaped) to concrete names. - downloadSubstitutes: - "\\.(jpg|gif|png)": "$name-hires-$uuid.$ext" - - name: deploying-gardenlets - contentSelectors: - - source: https://github.com/gardener/gardener/blob/master/docs/deployment/deploy_gardenlet.md - - name: automatic-deployment - contentSelectors: - - source: https://github.com/gardener/gardener/blob/master/docs/deployment/deploy_gardenlet_automatically.md - - name: deploy-gardenlet-manually - contentSelectors: - - source: https://github.com/gardener/gardener/blob/2a33b26458dddd7ad09c4c3b2311d3391db890e7/docs/deployment/deploy_gardenlet_manually.md - - name: shooted-seeds - contentSelectors: - - source: https://github.com/gardener/gardener/blob/master/docs/usage/shooted_seed.md -localityDomain: - github.com/gardener/gardener: - version: v1.10.0 - path: gardener/gardener + rewrites: + github.com/gardener/gardener: + version: v1.11.1 + "https://github.com/gardener/gardener/blob/master/docs/usage/shooted_seed.md": + # destination is the link reference URL. If it is empty string, + # the links markup is removed leaving only link text behind. + # For images, the entire markup is removed. + destination: "" + "https://kubernetes.io/docs/concepts/extend-kubernetes/operator/": + destination: "https://kubernetes.io/docs/concepts/extend-kubernetes/operator1111" + # text is a link text element (alt-text for images). Specifying text + # will change it to the new value. Empty string is valid only with + # `destination=""` + text: smooth operator + # title is the title element of a link or image. Specifying text + # will change it to the new value. + title: a title + downloads: + # localityDomains can be specified on node level too. + # Node's localityDomain definitions override and amend global ones. + scope: + github.com/gardener/gardener: + version: v1.11.1 + # downloadSubstitutes is a list of regular expressions matching + # links to resources on documents that will be downloaded, mapped + # to name expressions tha define how the downloaded resources will + # named. + # There is a set of variables that can be used to construct the + # expressions: + # - $name: the original name of the resource + # - $path: the original path of the resource + # - $uuid: a UUID generated for the resource + # - $ext: a original resource extension + # The default expression applying to all resources is: $uuid.$ext + # Besides regular-expression-to-expression mappings it is possible + # to map exact URLs (escaped) to concrete names. + renames: + "\\.(jpg|gif|png)": "$name-hires-$uuid.$ext" + - source: https://github.com/gardener/gardener/blob/master/docs/deployment/deploy_gardenlet.md + - source: https://github.com/gardener/gardener/blob/master/docs/deployment/deploy_gardenlet_automatically.md + - source: https://github.com/gardener/gardener/blob/2a33b26458dddd7ad09c4c3b2311d3391db890e7/docs/deployment/deploy_gardenlet_manually.md + - source: https://github.com/gardener/gardener/blob/master/docs/usage/shooted_seed.md +links: + # The key in the mapping is an absolute form of a document link that will be + # subject to transformation + rewrites: + github.com/gardener/gardener: + version: v1.10.0 + downloads: + scope: + github.com/gardener/gardener: ~ \ No newline at end of file diff --git a/example/simple/00.yaml b/example/simple/00.yaml index a41bb0ac..23fd92dc 100644 --- a/example/simple/00.yaml +++ b/example/simple/00.yaml @@ -1,11 +1,46 @@ -# The documentation structure root node. Mandatory. -root: - name: doc - # nodeSelector is resolved to a node hierarchy, where nodes are selected - # by criteria (criteria is not implemented yet, i.e no filters). - nodesSelector: - # A node selector path defines the scope that will be used to - # generate a hierarchy. For GitHub paths that is a folder in a GitHub repo - # and the generated nodes hierarchy corresponds ot the file/folder structure - # available in the repository at that path. - path: https://github.com/gardener/gardener/tree/v1.10.0/docs \ No newline at end of file +# Structuring Material +# +# Container and Document Nodes +# +# A documentation structure definition +# A structure consists of a list of node definitions. +# These could be A) document nodes that reference markdown content sources(s), +# or B) container nodes that structure nodes hierarchically and can contain +# other nodes. +# The structure below will be serialized as follows at destination : +# +# |__ overview.md (file) +# |__ concepts (folder) +# |____ apiserver.md (file) +# +structure: + # Example of a document node that will be serialized as top-level file in the + # destination configured in docforge, with name overview.md + # A node name will be used to identify the serialized node content. When + # writing to filesystem, the name translates ot a file name. + # + # A document node that has 'source' property provides several additional + # options for node names: + # 1. Node name can be inferred from its source path. + # 2. Node name can be constructed from an expression using the variables + # $name and $ext, which stand for the name of the resource at node's source + # path, and $ext stands for its extension. + - name: overview.md + # A document node should always have content form source(s) assigned to it. + # There are multiple options to assign content form source to a node. + # The simplest one is to use source as in this example, which assigns the + # content of the resources at the URL provided by source to this node. + # Other options with more sophisticated options to control and reorganize + # content from potentially multiple source are contentSelector and template. + source: https://github.com/gardener/gardener/blob/master/docs/README.md + # Example of a container node with no content source with name concepts. + # Writing this structure to a file system will serialize it as folder `concepts`. + - name: concepts + # The property `nodes` references the direct descendant nodes of this node. It + # is used to organize nodes into a tree. + # The property nodes is specific for container nodes. Document nodes do not + # specify nodes, because they are leafs in the tree nodes structure. + nodes: + # Example of a minimal document node specification. The node name is + # inferred to be 'apiserver.md' from the path in its source property. + - source: https://github.com/gardener/gardener/blob/master/docs/concepts/apiserver.md \ No newline at end of file diff --git a/example/simple/01.yaml b/example/simple/01.yaml index 6b1be9bc..ee78f7fc 100644 --- a/example/simple/01.yaml +++ b/example/simple/01.yaml @@ -1,41 +1,22 @@ -# The documentation structure root node. Mandatory. -root: - name: doc +# Structuring Material +# +# Node selectors +# +# Node selectors provide options for pulling whole remote hierarchies +# into node (sub)structures, and filtering them to a subset. +structure: + - name: concepts # nodeSelector is resolved to a node hierarchy, where nodes are selected - # by criteria (criteria is not implemented yet, i.e no filters). + # by optional filtering criteria, and attached to a container node as its + # descendants hierarchy. + # In this example the specified node selector will be resolved to a node hierarchy + # produced from the file-folder structure at the specified path. Only markdown + # documents are included as nodes. Empty folders are not included. + # The resolved structure is attached at this node, so the contents of the + # `concepts` folder below will become contents of this node (and folder when serialized) nodesSelector: - # A node selector path defines the scope that will be used to + # A node selector path defines the top-level scope that will be used to # generate a hierarchy. For GitHub paths that is a folder in a GitHub repo # and the generated nodes hierarchy corresponds ot the file/folder structure # available in the repository at that path. - path: https://github.com/gardener/gardener/tree/master/docs - # A list of child nodes to this structure node to form document structure hierarchy. - # Note that if a nodeSelector is specified on this node, will be merged with other - # existing nodes in `nodes`. Nodes with the same name will have their other properties - # merged, making it possible to add properties to the generated structure or additional - # nodes. - nodes: - - name: aws_provider - # contentSelectors is a list of source selection specifications. - # Normally, there will be one but it is possible to specify several and - # they will be appended in that order. - contentSelectors: - # Source specifies location of document source. - # The supported sources as of now are GitHub repository documents and wiki pages. - - source: https://github.com/gardener/gardener-extension-provider-aws/blob/v1.13.0/docs/usage-as-end-user.md -# A localityDomain defines the scope of documentation structure "local" -# resources that are downloaded along with structure's documents. -localityDomain: - # A locality domain. GFor GitHub it is in the form - # // - github.com/gardener/gardener: - # The version, if specified, is applied to all links inside this domain. - # Document-local resources that will be downloaded (inside `path`), and - # links that will be absolute in this domain (github.com/gardener/gardener) - # will be rewritten with this version in their URLs. - version: v1.11.1 - # Path inside this domain that defines the scope of the "document-local" - # resources, which will be downloaded along with documents. - # If version is specified, the links used to download the resources are - # rewritten to match the version. - path: gardener/gardener/docs \ No newline at end of file + path: https://github.com/gardener/gardener/tree/master/docs/concepts \ No newline at end of file diff --git a/example/simple/02.yaml b/example/simple/02.yaml new file mode 100644 index 00000000..34e6c521 --- /dev/null +++ b/example/simple/02.yaml @@ -0,0 +1,12 @@ +# Structuring Material +# +# Node selectors +# +# Node selectors provide options for pulling whole remote hierarchies +# into node (sub)structures, and filtering them to a subset. +nodesSelector: + # A node selector path defines the top-level scope that will be used to + # generate a hierarchy. For GitHub paths that is a folder in a GitHub repo + # and the generated nodes hierarchy corresponds ot the file/folder structure + # available in the repository at that path. + path: https://github.com/gardener/gardener/tree/master/docs/concepts \ No newline at end of file diff --git a/example/simple/03.yaml b/example/simple/03.yaml new file mode 100644 index 00000000..c67b1bb8 --- /dev/null +++ b/example/simple/03.yaml @@ -0,0 +1,56 @@ +# The documentation structure. +structure: + # Top-level container node with descendent hierarchy defined both with rules + # by its nodeSelector and explicitly with its nodes list property + # name stands for the name of the node. It is mandatory property for container nodes + - name: concepts + # nodeSelector is resolved to a node hierarchy using path and rules to select nodes + # to be part of the hierarchy. not specifying anything but path will end up in + # selecting the whole file/folder structure at path. + nodesSelector: + # A node selector path defines the scope that will be used to + # generate a hierarchy. For GitHub paths that is a folder in a GitHub repo + # and the generated nodes hierarchy corresponds ot the file/folder structure + # available in the repository at that path. + path: https://github.com/gardener/gardener/tree/v1.11.1/docs/concepts + # A list of child nodes to this structure node to explicitly define document structure hierarchy. + # Merging nodeSelector hierarchy with nodes: + # If both a nodeSelector and nodes are specified on this node, when the node selector is resolved, + # its resulting top-level nodes will merge with other existing nodes in `nodes`. Nodes with the same + # name will have their other properties merged, making it possible to add properties to + # the generated structure or additional nodes. + nodes: + # Name of this document node. + # Name is not mandatory for document nodes if source is provided. With source and no name, + # the name will be resolved to the resource name in source. + - name: architecture + # Source specifies location of document source. + # The supported sources as of now are GitHub repository documents and wiki pages. + source: https://github.com/gardener/documentation/wiki/Architecture +# Links define configuration for handling document resource references +# including both hyperlinks and images. +links: + # rewrites defines rewrite rules for document links. + # rules are mapped to regular expressions to match links in documents + rewrites: + # A regex to rule mapping binding all links that features gardener/gardener/blob, + # gardener/gardener/tree or gardener/gardener/raw to this rule + gardener/gardener/(blob|tree|raw): + # The version, if specified, is applied to all links matched by this regex. + # Both document-local resources that will be downloaded, and + # links that will be absolute in this domain (gardener/gardener) + # will be rewritten with this version in their URLs. + version: v1.11.1 + # Mapping a regular expression to nil effectively removes matching links + # from their documents + gardener/gardener/(pulls|pull|issue|issues): ~ + # downloads define the downloads domain and global renaming rules for + # downloaded resources + downloads: + # Scope defines the download scope for documents, mapping regular expressions + # to optional rename rules valid for links matched by them. Resources with URLs + # that match the regex will be downloaded. + # A mapping to nil will apply a default rename pattern ($uuid.$ext) + scope: + # Download all referenced blobs and raw type of files in gardener/gardener's docs folder + gardener/gardener/(blob|raw)/v1.11.1/docs: ~ \ No newline at end of file diff --git a/pkg/api/nodes.go b/pkg/api/nodes.go index da87a61d..bebf05b1 100755 --- a/pkg/api/nodes.go +++ b/pkg/api/nodes.go @@ -142,38 +142,53 @@ func (n *Node) AddStats(s ...*Stat) { } } -// FindNodeByContentSource traverses up and then all around the +// FindNodeBySource traverses up and then all around the // tree paths in the node's documentation structure, looking for -// a node that has contentSource path nodeContentSource -func FindNodeByContentSource(nodeContentSource string, node *Node) *Node { +// a node that has the source string either in source, contentSelector +// or template +func FindNodeBySource(source string, node *Node) *Node { if node == nil { return nil } - - for _, contentSelector := range node.ContentSelectors { - if contentSelector.Source == nodeContentSource { - return node - } + if n := matchAnySource(source, node); n != nil { + return n } root := node.GetRootNode() if root == nil { root = node } - return withMatchinContentSelectorSource(nodeContentSource, root) + return withMatchinContentSelectorSource(source, root) } -func withMatchinContentSelectorSource(nodeContentSource string, node *Node) *Node { - if node == nil { - return nil +func matchAnySource(source string, node *Node) *Node { + if node.Source == source { + return node } for _, contentSelector := range node.ContentSelectors { - if contentSelector.Source == nodeContentSource { + if contentSelector.Source == source { return node } } + if t := node.Template; t != nil { + for _, contentSelector := range t.Sources { + if contentSelector.Source == source { + return node + } + } + } + return nil +} + +func withMatchinContentSelectorSource(source string, node *Node) *Node { + if node == nil { + return nil + } + if n := matchAnySource(source, node); n != nil { + return n + } for i := range node.Nodes { - foundNode := withMatchinContentSelectorSource(nodeContentSource, node.Nodes[i]) + foundNode := withMatchinContentSelectorSource(source, node.Nodes[i]) if foundNode != nil { return foundNode } diff --git a/pkg/api/parser_test.go b/pkg/api/parser_test.go index 643538a3..918501fc 100755 --- a/pkg/api/parser_test.go +++ b/pkg/api/parser_test.go @@ -21,12 +21,13 @@ import ( "path/filepath" "testing" + "github.com/gardener/docforge/pkg/util/tests" "github.com/stretchr/testify/assert" ) var b = []byte(` -root: - name: root +structure: +- name: root nodes: - name: node_1 contentSelectors: @@ -36,20 +37,19 @@ root: - source: https://a.com properties: "custom_key": custom_value - localityDomain: - github.com/gardener/gardener: - exclude: - - a + links: + downloads: + scope: + github.com/gardener/gardener: ~ nodes: - name: subnode contentSelectors: - source: path/a -localityDomain: - github.com/gardener/gardener: - version: v1.10.0 - path: gardener/gardener/docs - LinkSubstitutes: - a: b +links: + rewrites: + github.com/gardener/gardener: + version: v1.10.0 + text: b `) func traverse(node *Node) { @@ -73,7 +73,9 @@ func TestParse(t *testing.T) { fmt.Println(err) return } - traverse(got.Root) + for _, n := range got.Structure { + traverse(n) + } // if got != c.want { // t.Errorf("Something(%q) == %q, want %q", c.in, got, c.want) // } @@ -87,23 +89,25 @@ func TestSerialize(t *testing.T) { }{ { &Documentation{ - Root: &Node{ - Title: "A Title", - Nodes: []*Node{ - { - Title: "node 1", - ContentSelectors: []ContentSelector{{Source: "path1/**"}}, - }, - { - Title: "path 2", - ContentSelectors: []ContentSelector{{Source: "https://a.com"}}, - Properties: map[string]interface{}{ - "custom_key": "custom_value", + Structure: []*Node{ + &Node{ + Name: "A Title", + Nodes: []*Node{ + { + Name: "node 1", + ContentSelectors: []ContentSelector{{Source: "path1/**"}}, }, - Nodes: []*Node{ - { - Title: "subnode", - ContentSelectors: []ContentSelector{{Source: "path/a"}}, + { + Name: "path 2", + ContentSelectors: []ContentSelector{{Source: "https://a.com"}}, + Properties: map[string]interface{}{ + "custom_key": "custom_value", + }, + Nodes: []*Node{ + { + Name: "subnode", + ContentSelectors: []ContentSelector{{Source: "path/a"}}, + }, }, }, }, @@ -128,22 +132,24 @@ func TestSerialize(t *testing.T) { func TestMe(t *testing.T) { d := &Documentation{ - Root: &Node{ - Name: "docs", - NodeSelector: &NodeSelector{ - Path: "https://github.com/gardener/gardener/tree/master/docs", - }, - Nodes: []*Node{ - { - Name: "calico", - NodeSelector: &NodeSelector{ - Path: "https://github.com/gardener/gardener-extension-networking-calico/tree/master/docs", - }, + Structure: []*Node{ + &Node{ + Name: "docs", + NodeSelector: &NodeSelector{ + Path: "https://github.com/gardener/gardener/tree/master/docs", }, - { - Name: "aws", - NodeSelector: &NodeSelector{ - Path: "https://github.com/gardener/gardener-extension-provider-aws/tree/master/docs", + Nodes: []*Node{ + { + Name: "calico", + NodeSelector: &NodeSelector{ + Path: "https://github.com/gardener/gardener-extension-networking-calico/tree/master/docs", + }, + }, + { + Name: "aws", + NodeSelector: &NodeSelector{ + Path: "https://github.com/gardener/gardener-extension-provider-aws/tree/master/docs", + }, }, }, }, @@ -164,45 +170,46 @@ func TestFile(t *testing.T) { got *Documentation ) expected := &Documentation{ - Root: &Node{ - Name: "00", - Nodes: []*Node{ - &Node{ - Name: "01", - ContentSelectors: []ContentSelector{ - ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/master/docs/concepts/gardenlet.md", - }, - }, - LocalityDomain: &LocalityDomain{ - LocalityDomainMap: LocalityDomainMap{ - "github.com/gardener/gardener": &LocalityDomainValue{ - Version: "v1.11.1", - Path: "gardener/gardener", - LinksMatchers: LinksMatchers{ - Exclude: []string{ - "example", - }, + Structure: []*Node{ + &Node{ + Name: "00", + Nodes: []*Node{ + &Node{ + Name: "01", + Source: "https://github.com/gardener/gardener/blob/master/docs/concepts/gardenlet.md", + Links: &Links{ + Rewrites: map[string]*LinkRewriteRule{ + "github.com/gardener/gardener": &LinkRewriteRule{ + Version: tests.StrPtr("v1.11.1"), + }, + }, + Downloads: &Downloads{ + Scope: map[string]ResourceRenameRules{ + "github.com/gardener/gardener": nil, }, }, }, }, - }, - &Node{ - Name: "02", - ContentSelectors: []ContentSelector{ - ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/master/docs/deployment/deploy_gardenlet.md", + &Node{ + Name: "02", + ContentSelectors: []ContentSelector{ + ContentSelector{ + Source: "https://github.com/gardener/gardener/blob/master/docs/deployment/deploy_gardenlet.md", + }, }, }, }, }, }, - LocalityDomain: &LocalityDomain{ - LocalityDomainMap: LocalityDomainMap{ - "github.com/gardener/gardener": &LocalityDomainValue{ - Version: "v1.10.0", - Path: "gardener/gardener", + Links: &Links{ + Rewrites: map[string]*LinkRewriteRule{ + "github.com/gardener/gardener": &LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), + }, + }, + Downloads: &Downloads{ + Scope: map[string]ResourceRenameRules{ + "github.com/gardener/gardener": nil, }, }, }, diff --git a/pkg/api/testdata/parse_test_00.yaml b/pkg/api/testdata/parse_test_00.yaml index 663f33d7..0b4d02f1 100644 --- a/pkg/api/testdata/parse_test_00.yaml +++ b/pkg/api/testdata/parse_test_00.yaml @@ -1,19 +1,22 @@ -root: - name: 00 +structure: +- name: 00 nodes: - name: 01 - contentSelectors: - - source: https://github.com/gardener/gardener/blob/master/docs/concepts/gardenlet.md - localityDomain: - github.com/gardener/gardener: - version: v1.11.1 - path: gardener/gardener - exclude: - - example + source: https://github.com/gardener/gardener/blob/master/docs/concepts/gardenlet.md + links: + rewrites: + "github.com/gardener/gardener": + version: v1.11.1 + downloads: + scope: + "github.com/gardener/gardener": ~ - name: 02 contentSelectors: - - source: https://github.com/gardener/gardener/blob/master/docs/deployment/deploy_gardenlet.md -localityDomain: - github.com/gardener/gardener: - version: v1.10.0 - path: gardener/gardener \ No newline at end of file + - source: https://github.com/gardener/gardener/blob/master/docs/deployment/deploy_gardenlet.md +links: + rewrites: + "github.com/gardener/gardener": + version: v1.10.0 + downloads: + scope: + "github.com/gardener/gardener": ~ \ No newline at end of file diff --git a/pkg/api/types.go b/pkg/api/types.go index 1dca0b84..95e8f7e5 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -15,114 +15,123 @@ package api -// Documentation is a documentation structure that can be serialized and deserialized -// and parsed into a model supporting the tasks around building a concrete documentation -// bundle. +// Documentation models a manifest for building a documentation structure from various +// sources into a coherent bundle. type Documentation struct { - // Root is the root node of this documentation structure - Root *Node `yaml:"root"` - // Variables are a set of key-value entries, where the key is the variable name - // and the value is a node template. Nodes defined as variables can be resused - // by reference throughout the documentation structure to minimise duplicate - // node definitions. A reference to a variable is in the format `$variable-name`, - // where `variable-name` is a key in this Variables map structure. + // Structure defines a documentation structure hierarchy. // + // Optional, alternative to NodeSelector + Structure []*Node `yaml:"structure,omitempty"` + // NodesSelector is a specification for building a documentation structure hierarchy. + // The root of the hierarchy is a node generated for the resource at the nodeSelector's + // path. It is attached as single, direct descendant of this Documentation. + // NodesSelector on this level is useful in a scenario where the intended structure needs + // not to be modelled but complies with an existing hierarchy that can be resolved to + // a documentation structure. // Note: WiP - proposed, not implemented yet. - Variables map[string]*Node `yaml:"variables,omitempty"` - // LocalityDomain defines the scope of the downloadable resources - // for this structure - LocalityDomain *LocalityDomain `yaml:"localityDomain,omitempty"` + // + // Optional, alternative to Structure + NodeSelector *NodeSelector `yaml:"nodesSelector,omitempty"` + // Links defines global rules for processing document links + // + // Optional + Links *Links `yaml:"links,omitempty"` + // Variables are a set of key-value entries that allow manifests to be parameterized. + // When the manifest is resolved, variables values are interpolated throughout the text. + // + // Note: WiP - proposed, not implemented yet. + // Optional + Variables map[string]interface{} `yaml:"variables,omitempty"` } -// Node is a recursive, tree data structure representing documentation model. +// Node is a recursive, tree data structure representing documentation structure. +// A Node's descendents are its `nodes` array elements. +// A node without any of the options for content assignment - Source, ContentSlectors +// or Template is a container node, and is serialized as folder. If it has a content +// assignment property, it is a document node and is serialized as file. +// Document nodes have a nil Nodes property. type Node struct { - parent *Node - // Name is the name of this node. If omitted, the name is the resource name from - // Source as reported by an eligible ResourceHandler's Name() method. - // Node with multiple Source entries require name. + // Name is an identifying name for this node that will be used also for its serialization. + // Name cannot be omitted if this is a container node. + // Name can be omitted for document nodes only if the Source property is specified. In this + // case, the Name value is the resource name from the location specified in Source. + // A document node without Source requires Name. + // The Name value of a node with Source can be also an expression constructed from several + // variables: + // - $name: the original name of the resource provided by Source + // - $ext: the extension of the resource provided by Source. May be empty string if the + // resource has no extension. + // - $uuid: a UUID identifier generated and at disposal for each node. + // + // Mandatory if this is a container Node or Source is not specified, optional otherwise Name string `yaml:"name,omitempty"` - // A reference to the parent of this node, unless it is the root. Unexported and - // assigned internally when the node structure is resolved. Not marshalled. - // Title is the title for a node displayed to human users - Title string `yaml:"title,omitempty"` - // Source is a sequence of path specifications to locate the resources - // that represent this document node. There must be at minimum one. When - // they are multiple, the resulting document is an aggregation of the - // material located at each path. - // - // A source path specification entries are in the following format: - // `path[#{semantic-block-selector}]`, where: - // - `path` is a valid resource locator for a document. - // - `semantic-block-selector`is an expression that selects semantic block - // elements from the document similar to CSS selectors (Note: WiP - proposed, - // not implemented yet.). - // - // Examples: - // - A single file - // `source: ["path/a/b/c/file.md"]` - // - // - Two files in order to construct a new document - // `source: ["path1/a/b/c/file1.md", - // "path2/e/f/g/file2.md"]` - // - // - A file and the section under the first heading level 1 from another file - // in that order to construct a new document. - // Note: WiP - proposed, not implemented yet. - // `source: ["path1/a/b/c/file1.md", - // "path2/e/f/g/file2.md#{h1:first-of-type}"]` + // ContentSelectors is a sequence of specifications for selecting cotent for this node. + // The content provided by the list of ContentSelectors is aggregated into a single document. + // + // Mandatory when there is no Name property. Alternative to ContentSelectors and Template. Only + // one must be specified. + Source string `yaml:"source,omitempty"` + // ContentSelectors is a sequence of specifications for selecting cotent for this node. + // The content provided by the list of ContentSelectors is aggregated into a single document. + // Name is a required property when ContentSelectors are used to assign content to a node. + // + // Optional, alternative to ContentSelectors and Template. Only one of them must be specified. ContentSelectors []ContentSelector `yaml:"contentSelectors,omitempty"` - // Nodes is an array of nodes that are subnodes (children) of this node + // Template is a specification for content selection and its application to a template, the + // product of which is this document node's content. + // Name is a required property when Template are used to assign content to a node. + // + // Optional, alternative to ContentSelectors and Source. Only one of them must be specified. + Template *Template `yaml:"template,omitempty"` + // Nodes is a list of nodes that are descendants of this Node. This field is applicable + // only to container nodes and not to document nodes. + // A folder node must always have a Name. // // Note: For a non-strict alternative for specifying child nodes, refer to // `NodesSelector` + // Optional Nodes []*Node `yaml:"nodes,omitempty"` - // NodesSelector is a structure modeling an existing structure of documents at a - // location that can be further filtered by their metadata propertis and set as - // child nodes to this node. This is an alternative to explicitly setting child - // nodes structure resource paths with `Nodes`. + // NodesSelector is a specification for building a documentation structure hierarchy, + // descending from this node. The modelled structure is merged into this node's Nodes + // field, masshing it up with potentially explicitly defined descendants there. The merge + // strategy identifies identical nodes by their name and in this case performs a merge + // of their properties. Where there are conflicts, the explicitly defined node wins. + // A NodeSelector can coexist or be an alternative to an explicitly defined structure, + // depending on the goal. + // // Note: WiP - proposed, not implemented yet. + // Optional NodeSelector *NodeSelector `yaml:"nodesSelector,omitempty"` // Properties are a map of arbitrary, key-value pairs to model custom, - // untyped node properties. They could be used to instruct specific ResourceHandlers - // and the serialization of the Node. For example the properties member could be - // used to set the front-matter to markdowns for front-matter aware builders such - // as Hugo. + // untyped node properties. They can be used for various purposes. For example, + // specifying a "fronatmatter" property on a node will result in applying the value as + // front matter in the resulting document content. This si applicable only to document + // nodes. Properties map[string]interface{} `yaml:"properties,omitempty"` + // Links defines the rules for handling links in this node's content. Applicable only + // to document nodes. + Links *Links `yaml:"links,omitempty"` - *LocalityDomain `yaml:"localityDomain,omitempty"` - - // LinksSubstitutes is an optional map of links and their - // substitutions. Use it to override the default handling of those - // links in documents referenced by this node's contentSelector: - // - An empty substitution string ("") removes a link markdown. - // It leaves only its text component in the document for links - // and nothing for images. - // This applies only to markdown for links and images. - // - A fixed string that will replace the whole original link - // destination. - // The keys in the substitution map are matched against documents - // links as exact string matches. The document links are converted to - // their absolute form for the match - // TODO: update this doc - LinksSubstitutes LinkSubstitutes `yaml:"linksSubstitutes,omitempty"` - - stats []*Stat + // private fields + parent *Node + stats []*Stat } -// NodeSelector is an specification for selecting subnodes (children) for a node. +// NodeSelector is a specification for selecting a descending hierarchy for a node. // The order in which the documents are selected is not guaranteed. The interpreters // of NodeSelectors can make use of the resource metadata or other sources to construct -// and populate child Nodes dynamically. +// and populate descendent Nodes dynamically. // // Example: -// - Select all documents located at path/a/b/c that have front-matter property -// `type` with value `faq`: +// - Select recursively all documents located at path /a/b/c that have front-matter +// property `type` with value `faq`: +// ``` +// nodesSelector: { +// path: "path/a/b/c" +// frontMatter: +// "type:faq" +// } // ``` -// nodesSelector: { -// path: "path/a/b/c", -// annotation: "type:faq" -// } -// ``` // will select markdown documents located at path/a/b/c with front-matter: // --- // type: faq @@ -131,98 +140,201 @@ type Node struct { // Note: WiP - proposed, not implemented yet. type NodeSelector struct { // Path is a resource locator to a set of files, i.e. to a resource container. + // A node selector path defines the scope that will be used to + // generate a hierarchy. For GitHub paths that is a folder in a GitHub repo + // and the generated nodes hierarchy corresponds ot the file/folder structure + // available in the repository at that path. + // Without any further criteria, all nodes within path are included. + // + // Mandatory Path string `yaml:"path"` - // Depth a maximum depth of the recursion. If omitted or less than 0, the - // constraint is not considered - Depth int64 `yaml:"depth,omitempty"` - // Annotation is an optional expression, filtering documents located at `Path` + // ExcludePath is a set of exclusion rules for node candidates for the hierarchy. + // Each rule is a regular expression to match a node's path that is relative to the + // path element. + // + // Optional + ExcludePaths []string `yaml:"excludePaths,omitempty"` + // ExcludeFrontMatter is an optional expression, filtering documents located at `Path` + // by their metadata properties. Markdown metadata is commonly provisioned as + // `front-matter` block at the head of the document delimited by comment + // tags (`---`). + // Documents with front matter that matches all map entries of this field + // are not selected. + // Note: WiP - proposed, not implemented yet. + // + // Optional + ExcludeFrontMatter map[string]interface{} `yaml:"excludeFrontMatter,omitempty"` + // FrontMatter is an optional expression, filtering documents located at `Path` // by their metadata properties. Markdown metadata is commonly provisioned as // `front-matter` block at the head of the document delimited by comment // tags (`---`). - Annotation string `yaml:"annotation,omitempty"` + // Documents with front matter that matches all map entries of this field + // are selected. + // Note: WiP - proposed, not implemented yet. + // + // Optional + FrontMatter map[string]interface{} `yaml:"frontMatter,omitempty"` + // Depth a maximum depth of the recursion. If omitted or less than 0, the + // constraint is not considered + // + // Optional + Depth int32 `yaml:"depth,omitempty"` } // ContentSelector specifies a document node content target +// A ContentSelector specification +// that constitute this document node's content. There must be at minimum one. When +// they are multiple, the resulting document is an aggregation of the +// material located at each path. +// +// A ContentSelector specification entries are in the following format: +// `path[#{semantic-block-selector}]`, where: +// - `path` is a valid resource locator for a document. +// - `semantic-block-selector`is an expression that selects semantic block +// elements from the document similar to CSS selectors (Note: WiP - proposed, +// not implemented yet.). +// +// Examples: +// - A single file +// `source: ["path/a/b/c/file.md"]` +// +// - Two files in order to construct a new document +// `source: ["path1/a/b/c/file1.md", +// "path2/e/f/g/file2.md"]` +// +// - A file and the section under the first heading level 1 from another file +// in that order to construct a new document. +// Note: WiP - proposed, not implemented yet. +// `source: ["path1/a/b/c/file1.md", +// "path2/e/f/g/file2.md#{h1:first-of-type}"]` type ContentSelector struct { // URI of a document + // + // Mandatory Source string `yaml:"source,omitempty"` // Optional filtering expression that selects content from the document content - // Omiting this file will select the whole document content. + // Omiting this file will select the whole document content at Source. + // + // Optional Selector *string `yaml:"selector,omitempty"` } -// LinksMatchers defines links exclusion/inclusion patterns -type LinksMatchers struct { - // Include is a list of regular expressions that will be matched to every - // link that is candidate for download to determine whether it is - // eligible. The links to match are absolute. - // Include can be used in conjunction with Exclude when it is easier/ - // preferable to deny all resources and allow selectively. - // Include can be used in conjunction with localityDomain to add - // additional resources not in the domain. - Include []string `yaml:"include,omitempty"` - // Exclude is a list of regular expression that will be matched to every - // link that is candidate for download to determine whether it is - // not eligible. The links to match are absolute. - // Use Exclude to further constrain the set of downloaded resources - // that are in a locality domain. - Exclude []string `yaml:"exclude,omitempty"` +// Template specifies rules for selecting content and applying it +// to a template +type Template struct { + // Path to the template file. + // A template file content is valid Golang template content. + // See https://golang.org/pkg/text/template. + // The template will have at disposal the variables defined in + // this specification's Sources. Their values will be the content + // selected by the coresponding specifications. + // + // Mandatory + Path string `yaml:"path"` + // Sources maps variable names to ContentSelectors that will be + // used as specification for the content to fetch and assign ot that + // these variables + Sources map[string]*ContentSelector `yaml:"path,omitempty"` } -// LocalityDomain contains the entries defining a -// locality domain scope. Each entry is a mapping -// between a domain, such as github.com/gardener/gardener, -// and a path in it that defines "local" resources. -// Documents referenced by documentation node structure -// are always part of the locality domain. Other -// resources referenced by those documents are checked -// against the path hierarchy of locality domain -// entries to determine how they will be processed. -type LocalityDomain struct { - LocalityDomainMap `yaml:",inline"` - // DownloadSubstitutes is an optional map of resource names in this - // locality domain and their substitutions. Use it to override the - // default downloads naming: - // - An exact download name mapped to a download resource will be used - // to name that resources when downloaded. - // - An expression with substitution variables can be used - // to change the default pattern for generating downloaded resource - // names, which is $uuid. - // The supported variables are: - // - $name: the original name of the resource - // - $path: the original path of the resource in this domain (may be empty) - // - $uuid: the identifier generated f=or the downloaded resource - // - $ext: the extension of the original resource (may be "") - // Example expression: $name-$uuid - DownloadSubstitutes map[string]string `yaml:"downloadSubstitutes,omitempty"` +// Links defines how document links are processed. +type Links struct { + // Rewrites maps regular expressions matching a document links resolved to absolute, + // with link rewriting rules. + // A common use is to rewrite resources links versions, if they support that to have + // them downloaded at a particular state. + // A rewrite mapping an expression to nil rules (~) is interpreted as request to remove + // the links matching the expression. + Rewrites map[string]*LinkRewriteRule + // Downloads are definition for document referenced resources that will be downloaded + // in dedicated destination (__resources by default) and optionally renamed. + // Downloads are performed after rewrites. + Downloads *Downloads } -// LocalityDomainMap maps domains such as github.com/gardener/gardener -// to LocalityDomainValues -type LocalityDomainMap map[string]*LocalityDomainValue - -// LocalityDomainValue encapsulates the members of a -// LocalityDomain entry value -type LocalityDomainValue struct { - // Version sets the version of the resources that will - // be referenced in this domain. Download targets and - // absolute links in documents referenced by the structure - // will be rewritten to match this version - Version string `yaml:"version"` - // Path is the relative path inside a domain that contains - // resources considered 'local' that will be downloaded. - Path string `yaml:"path"` - LinksMatchers `yaml:",inline"` +// LinkRewriteRule si a rule definition specifying link properties to be rewritten. +type LinkRewriteRule struct { + // Rewrites the version of links matching this pattern, e.g. master -> v1.11.3. + // For GitHub links the version will rewrite the sha path segment in the URL + // right after organization, repository and resource type. + // Note that not every link supports version. For example GitHub issues + // links have different pattern and it has no sha segment. + // The version will be applied only where applicable. + Version *string `yaml:"version,omitempty"` + // Rewrites the destination in a link|image markdown + // + // Example: + // with `destination: "github.tools.sap/kubernetes/gardener"` + // [a](github.com/gardener/gardener) -> [a](github.tools.sap/kubernetes/gardener) + // + // This setting overwrites a version setting if both exist so it makes little sense to use it + // with version. + // + // Note that destinations that are matched by a downloads specification will be converted to + // relative, using the result of the destination substitution. + // + // Setting destination to empty string leads to removing the link, leaving only the text element behind + // + // Example: + // with `destination: ""` [a](github.com/gardener/gardener) -> a + // + // Note that for images this will remove the image entirely: + // + // Example: + // with `destination: ""` ![alt-text-here](github.com/gardener/gardener/blob/master/images/b.png) -> + // + Destination *string `yaml:"destination,omitempty"` + // Rewrites or sets a matched link markdown's text component (alt-text for images) + // If used in combination with destination: "" and value "" this will effectively remove a link + // completely, leaving nothing behind in the document. + Text *string `yaml:"text,omitempty"` + // Rewrites or sets a matched link markdown's title component. + // Note that this will have no effect with settings destination: "" and text: "" as the whole + // markdown together with tis title will be removed. + Title *string `yaml:"title,omitempty"` } -// LinkSubstitutes is the mapping between absolute links -// and substitutions for them -type LinkSubstitutes map[string]*LinkSubstitute - -// LinkSubstitute comprises subtitutes for various link details -// commonly found in markup -type LinkSubstitute struct { - Text *string `yaml:"text,omitempty"` - Destination *string `yaml:"destination,omitempty"` - Title *string `yaml:"title,omitempty"` +// Downloads is a definition of the scope of downloadable resources and rules for renaming them. +type Downloads struct { + // Renames is a set of renaming rules that are globally applicable to all downloads + // regardless of scope. + // Example: + // renames: + // "\\.(jpg|gif|png)": "$name-hires-$uuid.$ext" + Renames ResourceRenameRules `yaml:"renames,omitempty"` + // Scope defines the scope for downloaded resources with a set of mappings between + // document links matching regular expressions and (optional) naming patterns. + // A scope map entry maps a regular expression that matches document links that will + // be downloaded to an optional rename specification or ~ for default. + // If no particular rename specification is supplied: + // 1. the globally supplied renames are tested to match and applied (if supplied) + // 2. a default rename expression `$uuid.$ext` will be applied to all matched targets. + // + // Example: define a download scope (only) that downloads every matching document. + // scope: + // gardener/gardener/(tree|blob|raw)/master/docs: ~ + // + // Example: define a download scope that downloads every matching document and + // renames it to a specific pattern if it is an jpg|gif|png image or uses the default + // naming pattern otherwise. + // scope: + // gardener/gardener/(tree|blob|raw)/master/docs: + // "\\.(jpg|gif|png)": "$name-image-$uuid.$ext" + Scope map[string]ResourceRenameRules `yaml:"scope,omitempty"` } + +// ResourceRenameRules defines a mapping between regular expressions matching +// resource locators and name pattern expressions or exact names. +// The name patter will be used to rename the downloaded resources matching the +// specified regular expression key. +// There is a set of variables that can be used to construct the +// naming expressions: +// - $name: the original name of the resource +// - $uuid: a UUID generated for the resource +// - $ext: a original resource extension +// The default expression applying to all resources is: $uuid.$ext +// +// Example: +// "\\.(jpg|gif|png)": "$name-image-$uuid.$ext" +// +type ResourceRenameRules map[string]string diff --git a/pkg/api/validate.go b/pkg/api/validate.go new file mode 100644 index 00000000..55037c07 --- /dev/null +++ b/pkg/api/validate.go @@ -0,0 +1,80 @@ +package api + +import ( + "fmt" + "strings" + + "github.com/hashicorp/go-multierror" +) + +// ValidateManifest performs validation of manifest according to +// the API rules for Documentation +func ValidateManifest(manifest *Documentation) error { + var errs *multierror.Error + if manifest != nil { + if manifest.NodeSelector == nil && manifest.Structure == nil { + errs = multierror.Append(errs, fmt.Errorf("At least nodeSelector or structure must be present as top-level elements in a manifest")) + } + validateNodeSelector(manifest.NodeSelector, errs) + if manifest.NodeSelector != nil { + validateStructure(manifest.Structure, errs) + } + } + return errs.ErrorOrNil() +} + +func validateStructure(structure []*Node, errs *multierror.Error) { + for _, node := range structure { + validateNode(node, errs) + validateStructure(node.Nodes, errs) + } +} + +func validateNode(node *Node, errs *multierror.Error) { + if len(node.Name) == 0 { + if len(node.Nodes) != 0 { + errs = multierror.Append(errs, fmt.Errorf("node property name must not be nil in container nodes")) + } + if len(node.ContentSelectors) > 0 { + errs = multierror.Append(errs, fmt.Errorf("node property name must not be nil in document node with contentSelectors")) + } + if node.Template != nil { + errs = multierror.Append(errs, fmt.Errorf("node property name must not be nil in document node with template")) + } + } + if len(node.Name) > 0 && len(node.Source) > 0 { + if strings.Contains(node.Name, "$name") || strings.Contains(node.Name, "$uuid") || strings.Contains(node.Name, "$ext") { + multierror.Append(errs, fmt.Errorf("node name variables are supported only together with source property: %s", node.Name)) + } + } + if len(node.Source) > 0 && node.ContentSelectors != nil { + multierror.Append(errs, fmt.Errorf("node source and contentSelectors are mutually exclusive properties")) + } + if len(node.Source) > 0 && node.Template != nil { + multierror.Append(errs, fmt.Errorf("node source and template are mutually exclusive properties")) + } + if node.ContentSelectors != nil && node.Template != nil { + multierror.Append(errs, fmt.Errorf("node contentSelectors and template are mutually exclusive properties")) + } + if len(node.Nodes) != 0 && len(node.ContentSelectors) > 0 { + multierror.Append(errs, fmt.Errorf("node nodes and contentSelectors are mutually exclusive properties")) + } + if len(node.Nodes) != 0 && len(node.Source) > 0 { + multierror.Append(errs, fmt.Errorf("node nodes and source are mutually exclusive properties")) + } + if len(node.Nodes) != 0 && node.Template != nil { + multierror.Append(errs, fmt.Errorf("node nodes and template are mutually exclusive properties")) + } + validateNodeSelector(node.NodeSelector, errs) +} + +func validateNodeSelector(ns *NodeSelector, errs *multierror.Error) { + if ns != nil { + if len(ns.Path) == 0 { + multierror.Append(errs, fmt.Errorf("nodeSelector path is mandatory property")) + } + if ns.Depth < 0 { + multierror.Append(errs, fmt.Errorf("nodeSelector depth property must be a positive integer")) + } + } +} diff --git a/pkg/reactor/build.go b/pkg/reactor/build.go index 308e18f8..ddaf4306 100644 --- a/pkg/reactor/build.go +++ b/pkg/reactor/build.go @@ -8,21 +8,20 @@ import ( "k8s.io/klog/v2" ) -func tasks(node *api.Node, t *[]interface{}) { - n := node - *t = append(*t, &DocumentWorkTask{ - Node: n, - }) - if node.Nodes != nil { - for _, n := range node.Nodes { - tasks(n, t) +func tasks(nodes []*api.Node, t *[]interface{}) { + for _, node := range nodes { + *t = append(*t, &DocumentWorkTask{ + Node: node, + }) + if node.Nodes != nil { + tasks(node.Nodes, t) } } } // Build starts the build operation for a document structure root // in a locality domain -func (r *Reactor) Build(ctx context.Context, documentationRoot *api.Node, localityDomain *localityDomain) error { +func (r *Reactor) Build(ctx context.Context, documentationStructure []*api.Node) error { var errors *multierror.Error errCh := make(chan error) @@ -45,7 +44,7 @@ func (r *Reactor) Build(ctx context.Context, documentationRoot *api.Node, locali r.DownloadController.Start(ctx, errCh, downloadShutdownCh) }() // start document controller with download scope - r.DocController.SetDownloadScope(localityDomain) + // r.DocController.SetDownloadScope(localityDomain) go func() { klog.V(6).Infoln("Starting document controller") r.DocController.Start(ctx, errCh, documentShutdownCh) @@ -78,7 +77,7 @@ func (r *Reactor) Build(ctx context.Context, documentationRoot *api.Node, locali // to exit when ready go func() { documentPullTasks := make([]interface{}, 0) - tasks(documentationRoot, &documentPullTasks) + tasks(documentationStructure, &documentPullTasks) for _, task := range documentPullTasks { r.DocController.Enqueue(ctx, task) } diff --git a/pkg/reactor/build_test.go b/pkg/reactor/build_test.go index 0035f47e..2f1f353c 100644 --- a/pkg/reactor/build_test.go +++ b/pkg/reactor/build_test.go @@ -12,7 +12,7 @@ func Test_tasks(t *testing.T) { type args struct { node *api.Node tasks []interface{} - lds localityDomain + // lds localityDomain } tests := []struct { name string @@ -22,12 +22,12 @@ func Test_tasks(t *testing.T) { { name: "it creates tasks based on the provided doc", args: args{ - node: newDoc.Root, + node: newDoc.Structure[0], tasks: []interface{}{}, }, expectedTasks: []*DocumentWorkTask{ { - Node: newDoc.Root, + Node: newDoc.Structure[0], }, { Node: archNode, @@ -47,7 +47,7 @@ func Test_tasks(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { rhs := resourcehandlers.NewRegistry(&FakeResourceHandler{}) - tasks(tc.args.node, &tc.args.tasks) + tasks([]*api.Node{tc.args.node}, &tc.args.tasks) if len(tc.args.tasks) != len(tc.expectedTasks) { t.Errorf("expected number of tasks %d != %d", len(tc.expectedTasks), len(tc.args.tasks)) diff --git a/pkg/reactor/content_processor.go b/pkg/reactor/content_processor.go index 361d704e..1f7f8ec3 100644 --- a/pkg/reactor/content_processor.go +++ b/pkg/reactor/content_processor.go @@ -27,44 +27,49 @@ var ( // NodeContentProcessor operates on documents content to reconcile links and // schedule linked resources downloads -type NodeContentProcessor struct { - resourceAbsLinks map[string]string - rwlock sync.RWMutex - localityDomain *localityDomain +type NodeContentProcessor interface { + ReconcileLinks(ctx context.Context, node *api.Node, contentSourcePath string, documentBlob []byte) ([]byte, error) + GetDownloadController() DownloadController +} + +type nodeContentProcessor struct { + resourceAbsLinks map[string]string + rwlock sync.RWMutex + globalLinksConfig *api.Links + // localityDomain *localityDomain // ResourcesRoot specifies the root location for downloaded resource. // It is used to rewrite resource links in documents to relative paths. resourcesRoot string - DownloadController DownloadController + downloadController DownloadController failFast bool markdownFmt bool rewriteEmbedded bool - ResourceHandlers resourcehandlers.Registry + resourceHandlers resourcehandlers.Registry } // NewNodeContentProcessor creates NodeContentProcessor objects -func NewNodeContentProcessor(resourcesRoot string, ld *localityDomain, downloadJob DownloadController, failFast bool, markdownFmt bool, rewriteEmbedded bool, resourceHandlers resourcehandlers.Registry) *NodeContentProcessor { - if ld == nil { - ld = &localityDomain{ - mapping: map[string]*localityDomainValue{}, - } - } - c := &NodeContentProcessor{ +func NewNodeContentProcessor(resourcesRoot string, globalLinksConfig *api.Links, downloadJob DownloadController, failFast bool, markdownFmt bool, rewriteEmbedded bool, resourceHandlers resourcehandlers.Registry) NodeContentProcessor { + c := &nodeContentProcessor{ resourceAbsLinks: make(map[string]string), - localityDomain: ld, + globalLinksConfig: globalLinksConfig, resourcesRoot: resourcesRoot, - DownloadController: downloadJob, + downloadController: downloadJob, failFast: failFast, markdownFmt: markdownFmt, rewriteEmbedded: rewriteEmbedded, - ResourceHandlers: resourceHandlers, + resourceHandlers: resourceHandlers, } return c } +func (c *nodeContentProcessor) GetDownloadController() DownloadController { + return c.downloadController +} + //convenience wrapper adding logging -func (c *NodeContentProcessor) schedule(ctx context.Context, download *Download, from string) { +func (c *nodeContentProcessor) schedule(ctx context.Context, download *Download, from string) { klog.V(6).Infof("[%s] Linked resource scheduled for download: %s\n", from, download.url) - c.DownloadController.Schedule(ctx, download.url, download.resourceName) + c.downloadController.Schedule(ctx, download.url, download.resourceName) } // ReconcileLinks analyzes a document referenced by a node's contentSourcePath @@ -73,7 +78,7 @@ func (c *NodeContentProcessor) schedule(ctx context.Context, download *Download, // destinations, or rewriting them to absolute, as well as downloading some of // the linked resources. // The function returns the processed document or error. -func (c *NodeContentProcessor) ReconcileLinks(ctx context.Context, node *api.Node, contentSourcePath string, documentBlob []byte) ([]byte, error) { +func (c *nodeContentProcessor) ReconcileLinks(ctx context.Context, node *api.Node, contentSourcePath string, documentBlob []byte) ([]byte, error) { klog.V(6).Infof("[%s] Reconciling links for %s\n", node.Name, contentSourcePath) fm, contentBytes, err := markdown.StripFrontMatter(documentBlob) @@ -96,14 +101,13 @@ func (c *NodeContentProcessor) ReconcileLinks(ctx context.Context, node *api.Nod return documentBytes, err } -func (c *NodeContentProcessor) reconcileMDLinks(ctx context.Context, docNode *api.Node, contentBytes []byte, contentSourcePath string) ([]byte, error) { +func (c *nodeContentProcessor) reconcileMDLinks(ctx context.Context, docNode *api.Node, contentBytes []byte, contentSourcePath string) ([]byte, error) { var errors *multierror.Error contentBytes, _ = markdown.UpdateLinkRefs(contentBytes, func(markdownType markdown.Type, destination, text, title []byte) ([]byte, []byte, []byte, error) { var ( - _destination string - _text, _title *string - download *Download - err error + _destination, _text, _title *string + download *Download + err error ) if _destination, _text, _title, download, err = c.resolveLink(ctx, docNode, string(destination), contentSourcePath); err != nil { errors = multierror.Append(err) @@ -111,33 +115,39 @@ func (c *NodeContentProcessor) reconcileMDLinks(ctx context.Context, docNode *ap return destination, text, title, err } } + if download != nil { + c.schedule(ctx, download, contentSourcePath) + } // rewrite abs links to embedded images to their raw format if necessary, to // ensure they are embedable - if c.rewriteEmbedded && markdownType == markdown.Image { - if _destination, err = c.rawImage(_destination); err != nil { + if c.rewriteEmbedded && markdownType == markdown.Image && _destination != nil { + if err = c.rawImage(_destination); err != nil { return destination, text, title, err } } + // write node processing stats for document nodes if docNode != nil { - if _destination != string(destination) { - recordLinkStats(docNode, "Links", fmt.Sprintf("%s -> %s", string(destination), _destination)) + if _destination != nil && *_destination != string(destination) { + if len(*_destination) == 0 { + *_destination = "*deleted*" + } + recordLinkStats(docNode, "Links", fmt.Sprintf("%s -> %s", string(destination), *_destination)) } else { - recordLinkStats(docNode, "Links", "") + if _text != nil && len(*_text) == 0 { + recordLinkStats(docNode, "Links", fmt.Sprintf("%s -> *deleted*", string(destination))) + } } } - if download != nil { - c.schedule(ctx, download, contentSourcePath) - } if _text != nil { text = []byte(*_text) } if _title != nil { title = []byte(*_title) } - if len(_destination) < 1 { + if _destination == nil { return nil, text, title, nil } - return []byte(_destination), text, title, nil + return []byte(*_destination), text, title, nil }) if c.failFast && errors != nil && errors.Len() > 0 { return nil, errors.ErrorOrNil() @@ -147,7 +157,7 @@ func (c *NodeContentProcessor) reconcileMDLinks(ctx context.Context, docNode *ap } // replace html raw links of any sorts. -func (c *NodeContentProcessor) reconcileHTMLLinks(ctx context.Context, docNode *api.Node, documentBytes []byte, contentSourcePath string) ([]byte, error) { +func (c *nodeContentProcessor) reconcileHTMLLinks(ctx context.Context, docNode *api.Node, documentBytes []byte, contentSourcePath string) ([]byte, error) { var errors *multierror.Error for _, regex := range htmlLinksRegexList { documentBytes = regex.ReplaceAllFunc(documentBytes, func(match []byte) []byte { @@ -159,10 +169,10 @@ func (c *NodeContentProcessor) reconcileHTMLLinks(ctx context.Context, docNode * url = strings.TrimSuffix(url, "\"") } destination, _, _, download, err := c.resolveLink(ctx, docNode, url, contentSourcePath) - klog.V(6).Infof("[%s] %s -> %s\n", contentSourcePath, url, destination) - if docNode != nil { - if url != destination { - recordLinkStats(docNode, "Links", fmt.Sprintf("%s -> %s", url, destination)) + klog.V(6).Infof("[%s] %s -> %s\n", contentSourcePath, url, *destination) + if docNode != nil && destination != nil { + if url != *destination { + recordLinkStats(docNode, "Links", fmt.Sprintf("%s -> %s", url, *destination)) } else { recordLinkStats(docNode, "Links", "") } @@ -174,7 +184,7 @@ func (c *NodeContentProcessor) reconcileHTMLLinks(ctx context.Context, docNode * errors = multierror.Append(err) return match } - return []byte(fmt.Sprintf("%s=%s", name, destination)) + return []byte(fmt.Sprintf("%s=%s", name, *destination)) }) } return documentBytes, errors.ErrorOrNil() @@ -187,122 +197,135 @@ type Download struct { } // returns destination, text (alt-text for images), title, download(url, downloadName), err -func (c *NodeContentProcessor) resolveLink(ctx context.Context, node *api.Node, destination string, contentSourcePath string) (string, *string, *string, *Download, error) { +func (c *nodeContentProcessor) resolveLink(ctx context.Context, node *api.Node, destination string, contentSourcePath string) (*string, *string, *string, *Download, error) { var ( - text, title, substituteDestination *string - hasSubstition bool - inLD bool - absLink string + substituteDestination, version, text, title *string + downloadResourceName, absLink string + ok bool + globalRewrites map[string]*api.LinkRewriteRule ) if strings.HasPrefix(destination, "#") || strings.HasPrefix(destination, "mailto:") { - return destination, nil, nil, nil, nil + return &destination, nil, nil, nil, nil } // validate destination - u, err := url.Parse(destination) + u, err := urls.Parse(destination) if err != nil { - return "", text, title, nil, err + return nil, text, title, nil, err } // can we handle this destination? - if u.IsAbs() && c.ResourceHandlers.Get(destination) == nil { + if u.IsAbs() && c.resourceHandlers.Get(destination) == nil { // It's a valid absolute link that is not in our scope. Leave it be. - return destination, text, title, nil, err + return &destination, text, title, nil, err } - - handler := c.ResourceHandlers.Get(contentSourcePath) + // force destination to absolute URL + handler := c.resourceHandlers.Get(contentSourcePath) if handler == nil { - return destination, text, title, nil, nil + return &destination, text, title, nil, nil } absLink, err = handler.BuildAbsLink(contentSourcePath, destination) if err != nil { - return "", text, title, nil, err + return nil, text, title, nil, err } - if hasSubstition, substituteDestination, text, title = substitute(absLink, node); hasSubstition && substituteDestination != nil { - if len(*substituteDestination) == 0 { - // quit early. substitution is a request to remove this link - return "", text, title, nil, nil - } - absLink = *substituteDestination - } - - //TODO: this is URI-specific (URLs only) - fixme - u, err = url.Parse(absLink) - if err != nil { - return "", text, title, nil, err + // rewrite link if required + if gLinks := c.globalLinksConfig; gLinks != nil { + globalRewrites = gLinks.Rewrites } _a := absLink - - resolvedLD := c.localityDomain if node != nil { - resolvedLD = resolveLocalityDomain(node, c.localityDomain) + if version, substituteDestination, text, title, ok = MatchForLinkRewrite(absLink, node, globalRewrites); ok { + if substituteDestination != nil { + if len(*substituteDestination) == 0 { + // quit early. substitution is a request to remove this link + s := "" + return nil, substituteDestination, &s, nil, nil + } + absLink = *substituteDestination + } + if version != nil { + handler := c.resourceHandlers.Get(absLink) + if handler == nil { + return &absLink, text, title, nil, nil + } + if absLink, err = handler.SetVersion(absLink, *version); err != nil { + klog.Warningf("Failed to set version %s to %s: %s\n", *version, absLink, err.Error()) + return &absLink, text, title, nil, nil + } + } + } } - if resolvedLD != nil { - absLink, inLD = resolvedLD.MatchPathInLocality(absLink, c.ResourceHandlers) + + // validate potentially rewritten links + u, err = urls.Parse(absLink) + if err != nil { + return nil, text, title, nil, err } if _a != absLink { - klog.V(6).Infof("[%s] Link converted %s -> %s\n", contentSourcePath, _a, absLink) + klog.V(6).Infof("[%s] Link rewritten %s -> %s\n", contentSourcePath, _a, absLink) } - // Links to other documents are enforced relative when - // linking documents from the node structure. - // Links to other documents are changed to match the linking - // document version when appropriate or left untouched. - if strings.HasSuffix(u.Path, ".md") { - //TODO: this is URI-specific (URLs only) - fixme - l := strings.TrimSuffix(absLink, "?") - l = strings.TrimSuffix(l, "#") - if existingNode := api.FindNodeByContentSource(l, node); existingNode != nil { - relPathBetweenNodes := node.RelativePath(existingNode) - if destination != relPathBetweenNodes { - klog.V(6).Infof("[%s] %s -> %s\n", contentSourcePath, destination, relPathBetweenNodes) + + if node != nil { + // Links to other documents are enforced relative when + // linking documents from the node structure. + // Check if md extension to reduce the walkthroughs + if u.Extension == "md" { + if existingNode := api.FindNodeBySource(absLink, node); existingNode != nil { + relPathBetweenNodes := node.RelativePath(existingNode) + if destination != relPathBetweenNodes { + klog.V(6).Infof("[%s] %s -> %s\n", contentSourcePath, destination, relPathBetweenNodes) + } + return &relPathBetweenNodes, text, title, nil, nil } - destination = relPathBetweenNodes - return destination, text, title, nil, nil + return &absLink, text, title, nil, nil } - return absLink, text, title, nil, nil - } - // Links to resources are assessed for download eligibility - // and if applicable their destination is updated as relative - // path to predefined location for resources - if absLink != "" && inLD { - resourceName := c.generateResourceName(absLink, resolvedLD) - _d := destination - destination = buildDestination(node, resourceName, c.resourcesRoot) - if _d != destination { - klog.V(6).Infof("[%s] %s -> %s\n", contentSourcePath, _d, destination) + // Links to resources that are not structure document nodes are + // assessed for download eligibility and if applicable their + // destination is updated to relative path to predefined location + // for resources. + var globalDownloadsConfig *api.Downloads + if c.globalLinksConfig != nil { + globalDownloadsConfig = c.globalLinksConfig.Downloads + } + if downloadResourceName, ok = MatchForDownload(u, node, globalDownloadsConfig); ok { + resourceName := c.getDownloadResourceName(u, downloadResourceName) + _d := destination + destination = buildDownloadDestination(node, resourceName, c.resourcesRoot) + if _d != destination { + klog.V(6).Infof("[%s] %s -> %s\n", contentSourcePath, _d, destination) + } + return &destination, text, title, &Download{absLink, resourceName}, nil } - return destination, text, title, &Download{absLink, resourceName}, nil } if destination != absLink { klog.V(6).Infof("[%s] %s -> %s\n", contentSourcePath, destination, absLink) } - return absLink, text, title, nil, nil + return &absLink, text, title, nil, nil } -func (c *NodeContentProcessor) rawImage(src string) (string, error) { +// rewrite abs links to embedded objects to their raw link format if necessary, to +// ensure they are embedable +func (c *nodeContentProcessor) rawImage(link *string) (err error) { var ( - u *url.URL - err error + u *url.URL ) - if u, err = url.Parse(src); err != nil { - return src, err + if u, err = url.Parse(*link); err != nil { + return } if !u.IsAbs() { - return src, nil + return nil } - handler := c.ResourceHandlers.Get(src) + handler := c.resourceHandlers.Get(*link) if handler == nil { - return src, nil + return nil } - // rewrite abs links to embedded objects to their raw format if necessary, to - // ensure they are embedable - if src, err = handler.GetRawFormatLink(src); err != nil { - return src, err + if *link, err = handler.GetRawFormatLink(*link); err != nil { + return } - return src, nil + return nil } // Builds destination path for links from node to resource in root path @@ -311,7 +334,7 @@ func (c *NodeContentProcessor) rawImage(src string) (string, error) { // in root, e.g. "../../__resources/image.png", where root is "__resources". // If root is document root path, destinations are paths from the root, // e.g. "/__resources/image.png", where root is "/__resources". -func buildDestination(node *api.Node, resourceName, root string) string { +func buildDownloadDestination(node *api.Node, resourceName, root string) string { if strings.HasPrefix(root, "/") { return root + "/" + resourceName } @@ -323,41 +346,15 @@ func buildDestination(node *api.Node, resourceName, root string) string { return resourceRelPath } -func (c *NodeContentProcessor) generateResourceName(absURL string, resolvedLD *localityDomain) string { - var ( - ok bool - resourceName string - ) - u, _ := urls.Parse(absURL) +// Check for cached resource name first and return that if found. Otherwise, +// return the downloadName +func (c *nodeContentProcessor) getDownloadResourceName(u *urls.URL, downloadName string) string { c.rwlock.Lock() defer c.rwlock.Unlock() - if resourceName, ok = c.resourceAbsLinks[u.Path]; !ok { - resourceName = u.ResourceName - if len(u.Extension) > 0 { - resourceName = fmt.Sprintf("%s.%s", u.ResourceName, u.Extension) - } - resourceName = resolvedLD.GetDownloadedResourceName(u) - c.resourceAbsLinks[absURL] = resourceName - } - return resourceName -} - -// returns substitution found, destination, text, title -func substitute(absLink string, node *api.Node) (ok bool, destination *string, text *string, title *string) { - if node == nil { - return false, nil, nil, nil - } - if substitutes := node.LinksSubstitutes; substitutes != nil { - for substituteK, substituteV := range substitutes { - // remove trailing slashes to avoid inequality only due to that - l := strings.TrimSuffix(absLink, "/") - s := strings.TrimSuffix(substituteK, "/") - if s == l { - return true, substituteV.Destination, substituteV.Text, substituteV.Title - } - } + if cachedDownloadName, ok := c.resourceAbsLinks[u.Path]; ok { + return cachedDownloadName } - return false, nil, nil, nil + return downloadName } // recordLinkStats records link stats for a node diff --git a/pkg/reactor/content_processor_test.go b/pkg/reactor/content_processor_test.go index 4957aec2..38558273 100644 --- a/pkg/reactor/content_processor_test.go +++ b/pkg/reactor/content_processor_test.go @@ -8,164 +8,165 @@ import ( "github.com/gardener/docforge/pkg/api" "github.com/gardener/docforge/pkg/resourcehandlers" "github.com/gardener/docforge/pkg/resourcehandlers/github" + "github.com/gardener/docforge/pkg/util/tests" "github.com/stretchr/testify/assert" ) func Test_processLink(t *testing.T) { nodeA := &api.Node{ - Name: "node_A.md", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", - }, - }, + Name: "node_A.md", + Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", } nodeB := &api.Node{ - Name: "node_B.md", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/extensions/overview.md", - }, - }, + Name: "node_B.md", + Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/extensions/overview.md", } nodeA.Nodes = []*api.Node{nodeB} nodeA.SetParentsDownwards() - tests := []struct { + testCases := []struct { name string node *api.Node destination string contentSourcePath string - wantDestination string - wantDownloadURL string - wantResourceName string + wantDestination *string + wantText *string + wantTitle *string + wantDownload *Download wantErr error - mutate func(c *NodeContentProcessor) + mutate func(c *nodeContentProcessor) }{ // skipped links { name: "Internal document links are not processed", destination: "#internal-link", contentSourcePath: "", - wantDestination: "#internal-link", - wantDownloadURL: "", - wantResourceName: "", + wantDestination: tests.StrPtr("#internal-link"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, { name: "mailto protocol is not processed", destination: "mailto:a@b.com", contentSourcePath: "", - wantDestination: "mailto:a@b.com", - wantDownloadURL: "", - wantResourceName: "", + wantDestination: tests.StrPtr("mailto:a@b.com"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, { name: "Absolute links to releases not processed", destination: "https://github.com/gardener/gardener/releases/tag/v1.4.0", - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "https://github.com/gardener/gardener/releases/tag/v1.4.0", - wantDownloadURL: "", - wantResourceName: "", + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("https://github.com/gardener/gardener/releases/tag/v1.4.0"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, { name: "Relative links to releases not processed", destination: "../../../releases/tag/v1.4.0", - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "https://github.com/gardener/gardener/releases/tag/v1.4.0", - wantDownloadURL: "", - wantResourceName: "", + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("https://github.com/gardener/gardener/releases/tag/v1.4.0"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, // links to resources { - name: "Relative link to resource NOT in locality domain", + name: "Relative link to resource NOT in download scope", destination: "./image.png", contentSourcePath: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", - wantDestination: "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", - wantDownloadURL: "", - wantResourceName: "", + wantDestination: tests.StrPtr("https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, { - name: "Relative link to resource in locality domain", + name: "Relative link to resource in download scope", node: &api.Node{ - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", - }, - }, + Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", }, destination: "./image.png", contentSourcePath: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", - wantDestination: "/__resources", - wantDownloadURL: "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", - wantResourceName: "", - wantErr: nil, - mutate: func(c *NodeContentProcessor) { - c.localityDomain = &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/gardener/gardener": &localityDomainValue{ - "v1.10.0", - "gardener/gardener/docs", - nil, - nil, + wantDestination: tests.StrPtr("/__resources"), + wantText: nil, + wantTitle: nil, + wantDownload: &Download{ + "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", + "", + }, + wantErr: nil, + mutate: func(c *nodeContentProcessor) { + c.globalLinksConfig = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "/gardener/gardener/": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), + }, + }, + Downloads: &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "/gardener/gardener/(blob|raw|wiki)/v1.10.0/docs": nil, }, }, } }, }, { - name: "Relative link to resource NOT in locality domain", + name: "Relative link to resource NOT in download scope", node: &api.Node{ - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", - }, - }, + Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", }, destination: "../image.png", contentSourcePath: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", - wantDestination: "https://github.com/gardener/gardener/blob/v1.10.0/image.png", - wantDownloadURL: "", - wantResourceName: "", + wantDestination: tests.StrPtr("https://github.com/gardener/gardener/blob/v1.10.0/image.png"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, { - name: "Absolute link to resource NOT in locality domain", + name: "Absolute link to resource NOT in download scope", node: nodeA, destination: "https://github.com/owner/repo/blob/master/docs/image.png", - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "https://github.com/owner/repo/blob/master/docs/image.png", - wantDownloadURL: "", - wantResourceName: "", + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("https://github.com/owner/repo/blob/master/docs/image.png"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, { - name: "Absolute link to resource in locality domain", + name: "Absolute link to resource in download scope", node: &api.Node{ - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", - }, - }, + Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", }, destination: "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", contentSourcePath: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", - wantDestination: "/__resources", - wantDownloadURL: "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", - wantResourceName: "", - wantErr: nil, - mutate: func(c *NodeContentProcessor) { - c.localityDomain = &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/gardener/gardener": &localityDomainValue{ - "v1.10.0", - "gardener/gardener/docs", - nil, - nil, + wantDestination: tests.StrPtr("/__resources"), + wantText: nil, + wantTitle: nil, + wantDownload: &Download{ + "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", + "", + }, + wantErr: nil, + mutate: func(c *nodeContentProcessor) { + c.globalLinksConfig = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "/gardener/gardener/": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), + }, + }, + Downloads: &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "/gardener/gardener/(blob|raw|wiki)/v1.10.0/docs": nil, }, }, } @@ -173,64 +174,72 @@ func Test_processLink(t *testing.T) { }, // links to documents { - name: "Absolute link to document NOT in locality domain and NOT from structure", + name: "Absolute link to document NOT in download scope and NOT from structure", node: nodeA, destination: "https://github.com/owner/repo/blob/master/docs/doc.md", - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "https://github.com/owner/repo/blob/master/docs/doc.md", - wantDownloadURL: "", - wantResourceName: "", + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("https://github.com/owner/repo/blob/master/docs/doc.md"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, { - name: "Absolute link to document in locality domain and from structure", + name: "Absolute link to document in download scope and from structure", node: nodeA, - destination: nodeB.ContentSelectors[0].Source, - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "./node_B.md", - wantDownloadURL: "", - wantResourceName: "", + destination: nodeB.Source, + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("./node_B.md"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, - mutate: func(c *NodeContentProcessor) { - c.localityDomain = &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/gardener/gardener": &localityDomainValue{ - "v1.10.0", - "gardener/gardener/docs", - nil, - nil, + mutate: func(c *nodeContentProcessor) { + c.globalLinksConfig = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "/gardener/gardener/": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), + }, + }, + Downloads: &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "/gardener/gardener/(blob|raw|wiki)/v1.10.0/docs": nil, }, }, } }, }, { - name: "Relative link to document NOT in locality domain and NOT from structure", + name: "Relative link to document NOT in download scope and NOT from structure", node: nodeA, destination: "https://github.com/owner/repo/blob/master/docs/doc.md", - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "https://github.com/owner/repo/blob/master/docs/doc.md", - wantDownloadURL: "", - wantResourceName: "", + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("https://github.com/owner/repo/blob/master/docs/doc.md"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, }, { - name: "Relative link to document in locality domain and NOT from structure", + name: "Relative link to document in download scope and NOT from structure", node: nodeA, destination: "./another.md", - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "https://github.com/gardener/gardener/blob/v1.10.0/docs/another.md", - wantDownloadURL: "", - wantResourceName: "", + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("https://github.com/gardener/gardener/blob/v1.10.0/docs/another.md"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, - mutate: func(c *NodeContentProcessor) { - c.localityDomain = &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/gardener/gardener": &localityDomainValue{ - "v1.10.0", - "gardener/gardener/docs", - nil, - nil, + mutate: func(c *nodeContentProcessor) { + c.globalLinksConfig = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "/gardener/gardener/": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), + }, + }, + Downloads: &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "/gardener/gardener/(blob|raw|wiki)/v1.10.0/docs": nil, }, }, } @@ -238,22 +247,20 @@ func Test_processLink(t *testing.T) { }, // Version rewrite { - name: "Absolute link to document not in locality domain version rewrite", + name: "Absolute link to document not in download scope version rewrite", node: nodeA, destination: "https://github.com/gardener/gardener/blob/master/sample.md", - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "https://github.com/gardener/gardener/blob/v1.10.0/sample.md", - wantDownloadURL: "", - wantResourceName: "", + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("https://github.com/gardener/gardener/blob/v1.10.0/sample.md"), + wantText: nil, + wantTitle: nil, + wantDownload: nil, wantErr: nil, - mutate: func(c *NodeContentProcessor) { - c.localityDomain = &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/gardener/gardener": &localityDomainValue{ - "v1.10.0", - "gardener/gardener/docs", - nil, - nil, + mutate: func(c *nodeContentProcessor) { + c.globalLinksConfig = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "/gardener/gardener/(blob)": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), }, }, } @@ -263,134 +270,119 @@ func Test_processLink(t *testing.T) { name: "Absolute link to resource version rewrite", node: nodeA, destination: "https://github.com/gardener/gardener/blob/master/docs/image.png", - contentSourcePath: nodeA.ContentSelectors[0].Source, - wantDestination: "/__resources", - wantDownloadURL: "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", - wantResourceName: "", - wantErr: nil, - mutate: func(c *NodeContentProcessor) { - c.localityDomain = &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/gardener/gardener": &localityDomainValue{ - "v1.10.0", - "gardener/gardener/docs", - nil, - nil, + contentSourcePath: nodeA.Source, + wantDestination: tests.StrPtr("/__resources"), + wantText: nil, + wantTitle: nil, + wantDownload: &Download{ + "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", + "", + }, + wantErr: nil, + mutate: func(c *nodeContentProcessor) { + c.globalLinksConfig = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "/gardener/gardener/": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), + }, + }, + Downloads: &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "/gardener/gardener/(blob|raw|wiki)/v1.10.0/docs": nil, + }, + }, + } + }, + }, + { + name: "Relative link to resource in download scope with rewrites", + node: &api.Node{ + Source: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", + Links: &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png": &api.LinkRewriteRule{ + Text: tests.StrPtr("Test text"), + Title: tests.StrPtr("Test title"), + }, + }, + }, + }, + destination: "./image.png", + contentSourcePath: "https://github.com/gardener/gardener/blob/v1.10.0/docs/README.md", + wantDestination: tests.StrPtr("/__resources"), + wantText: tests.StrPtr("Test text"), + wantTitle: tests.StrPtr("Test title"), + wantDownload: &Download{ + "https://github.com/gardener/gardener/blob/v1.10.0/docs/image.png", + "", + }, + wantErr: nil, + mutate: func(c *nodeContentProcessor) { + c.globalLinksConfig = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "/gardener/gardener/": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), + }, + }, + Downloads: &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "/gardener/gardener/(blob|raw|wiki)/v1.10.0/docs": nil, }, }, } }, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := &NodeContentProcessor{ + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + c := &nodeContentProcessor{ resourceAbsLinks: make(map[string]string), - localityDomain: &localityDomain{ - mapping: map[string]*localityDomainValue{}, - }, resourcesRoot: "/__resources", - ResourceHandlers: resourcehandlers.NewRegistry(github.NewResourceHandler(nil, []string{"github.com"})), + resourceHandlers: resourcehandlers.NewRegistry(github.NewResourceHandler(nil, []string{"github.com"})), rewriteEmbedded: true, } - if tt.mutate != nil { - tt.mutate(c) + if tc.mutate != nil { + tc.mutate(c) } ctx, cancel := context.WithCancel(context.Background()) defer cancel() - gotDestination, _, _, gotDownload, gotErr := c.resolveLink(ctx, tt.node, tt.destination, tt.contentSourcePath) + gotDestination, gotText, gotTitle, gotDownload, gotErr := c.resolveLink(ctx, tc.node, tc.destination, tc.contentSourcePath) - if gotErr != tt.wantErr { - t.Errorf("expected err %s != %s", gotErr, tt.wantErr) - } - if len(tt.wantDownloadURL) > 0 { - if len(tt.wantDestination) == 0 && gotDestination != tt.wantDestination { - t.Errorf("expected destination %s != %s", tt.wantDestination, gotDestination) - } else if !strings.HasPrefix(gotDestination, tt.wantDestination) { - t.Errorf("expected destination starting with %s, was %s", tt.wantDestination, gotDestination) - } - if gotDownload.url != tt.wantDownloadURL { - t.Errorf("expected downloadURL %s != %s", tt.wantDownloadURL, gotDownload.url) - } - if len(gotDownload.resourceName) == 0 { - t.Error("expected resource name != \"\"\n", gotDownload.resourceName) - } - } else { - if gotDestination != tt.wantDestination { - t.Errorf("expected destination %s != %s", tt.wantDestination, gotDestination) - } - if gotDownload != nil { - t.Error("expected download == nil") - return + assert.Equal(t, tc.wantErr, gotErr) + if gotDownload != nil { + if tc.wantDownload != nil { + tc.wantDownload.resourceName = gotDownload.resourceName } } - }) - } -} - -func Test_Substitute(t *testing.T) { - cda := "cda" - testCases := []struct { - link string - substitutes map[string]*api.LinkSubstitute - wantDestination string - wantOK bool - wantText *string - wantTitle *string - }{ - { - "abc", - map[string]*api.LinkSubstitute{ - "abc": &api.LinkSubstitute{ - Destination: &cda, - }, - }, - "cda", - true, - &cda, - &cda, - }, - { - "abc", - map[string]*api.LinkSubstitute{}, - "abc", - false, - &cda, - &cda, - }, - { - "", - map[string]*api.LinkSubstitute{ - "abc": &api.LinkSubstitute{ - Destination: &cda, - }, - }, - "", - false, - nil, - nil, - }, - } - for _, tc := range testCases { - t.Run("", func(t *testing.T) { - n := &api.Node{ - LinksSubstitutes: tc.substitutes, - } - var ( - gotOK bool - gotDestination, gotText, gotTitle *string - ) - gotOK, gotDestination, gotText, gotTitle = substitute(tc.link, n) - assert.Equal(t, tc.wantOK, gotOK) + assert.Equal(t, tc.wantDownload, gotDownload) + var destination, text, title string if gotDestination != nil { - assert.Equal(t, tc.wantDestination, *gotDestination) + destination = *gotDestination + } + if tc.wantDestination != nil { + if !strings.HasPrefix(destination, *tc.wantDestination) { + t.Errorf("expected destination starting with %s, was %s", *tc.wantDestination, destination) + return + } + } else { + assert.Equal(t, tc.wantDestination, gotDestination) } if gotText != nil { - assert.Equal(t, tc.wantText, *gotText) + text = *gotText + } + if tc.wantText != nil { + assert.Equal(t, *tc.wantText, text) + } else { + assert.Equal(t, tc.wantText, gotText) } if gotTitle != nil { - assert.Equal(t, tc.wantTitle, *gotTitle) + title = *gotTitle + } + if tc.wantText != nil { + assert.Equal(t, *tc.wantTitle, title) + } else { + assert.Equal(t, tc.wantTitle, gotTitle) } }) } diff --git a/pkg/reactor/document_controller.go b/pkg/reactor/document_controller.go index c65323e0..a45c02ca 100644 --- a/pkg/reactor/document_controller.go +++ b/pkg/reactor/document_controller.go @@ -11,7 +11,7 @@ type DocumentController interface { jobs.Controller // SetDownloadScope sets the scope for resources considered "local" // and therefore downloaded and relatively linked - SetDownloadScope(scope *localityDomain) + // SetDownloadScope(scope *localityDomain) // GetDownloadController is accessor for the DownloadController // working with this DocumentController GetDownloadController() DownloadController @@ -41,11 +41,12 @@ func NewDocumentController(worker *DocumentWorker, workersCount int, failfast bo func (d *docController) Shutdown() { d.Controller.Shutdown() // propagate the shutdown to the related download controller - d.Worker.(*DocumentWorker).NodeContentProcessor.DownloadController.Shutdown() -} -func (d *docController) SetDownloadScope(scope *localityDomain) { - d.Worker.(*DocumentWorker).NodeContentProcessor.localityDomain = scope + d.Worker.(*DocumentWorker).NodeContentProcessor.GetDownloadController().Shutdown() } + +// func (d *docController) SetDownloadScope(scope *localityDomain) { +// d.Worker.(*DocumentWorker).NodeContentProcessor.localityDomain = scope +// } func (d *docController) GetDownloadController() DownloadController { - return d.Worker.(*DocumentWorker).NodeContentProcessor.DownloadController + return d.Worker.(*DocumentWorker).NodeContentProcessor.GetDownloadController() } diff --git a/pkg/reactor/document_worker.go b/pkg/reactor/document_worker.go index d35b5153..b521d016 100644 --- a/pkg/reactor/document_worker.go +++ b/pkg/reactor/document_worker.go @@ -12,6 +12,7 @@ import ( "github.com/gardener/docforge/pkg/resourcehandlers" utilnode "github.com/gardener/docforge/pkg/util/node" "github.com/gardener/docforge/pkg/writers" + "k8s.io/klog/v2" ) // Reader reads the bytes data from a given source URI @@ -24,8 +25,7 @@ type DocumentWorker struct { writers.Writer Reader processors.Processor - NodeContentProcessor *NodeContentProcessor - localityDomain localityDomain + NodeContentProcessor NodeContentProcessor } // DocumentWorkTask implements jobs#Task @@ -53,29 +53,45 @@ func (w *DocumentWorker) Work(ctx context.Context, task interface{}, wq jobs.Wor if task, ok := task.(*DocumentWorkTask); ok { var ( - b bytes.Buffer - document []byte - err error + b bytes.Buffer + sourceBlob, document []byte + err error ) - if len(task.Node.ContentSelectors) > 0 { - for _, content := range task.Node.ContentSelectors { - var sourceBlob []byte - if sourceBlob, err = w.Reader.Read(ctx, content.Source); err != nil { + if len(task.Node.Nodes) == 0 { + if len(task.Node.ContentSelectors) > 0 { + for _, content := range task.Node.ContentSelectors { + if sourceBlob, err = w.Reader.Read(ctx, content.Source); err != nil { + return jobs.NewWorkerError(err, 0) + } + if len(sourceBlob) == 0 { + continue + } + if sourceBlob, err = w.NodeContentProcessor.ReconcileLinks(ctx, task.Node, content.Source, sourceBlob); err != nil { + return jobs.NewWorkerError(err, 0) + } + b.Write(sourceBlob) + } + } + // TODO: implement read by template + if len(task.Node.Source) > 0 { + if sourceBlob, err = w.Reader.Read(ctx, task.Node.Source); err != nil { return jobs.NewWorkerError(err, 0) } if len(sourceBlob) == 0 { - continue + klog.Warningf("No content read from node %s source %s:", task.Node.Name, task.Node.Source) + return nil } - if sourceBlob, err = w.NodeContentProcessor.ReconcileLinks(ctx, task.Node, content.Source, sourceBlob); err != nil { + if sourceBlob, err = w.NodeContentProcessor.ReconcileLinks(ctx, task.Node, task.Node.Source, sourceBlob); err != nil { return jobs.NewWorkerError(err, 0) } b.Write(sourceBlob) } - if b.Len() == 0 { + klog.Warningf("Document node processing halted: No content assigned to document node %s", task.Node.Name) return nil } + if document, err = ioutil.ReadAll(&b); err != nil { return jobs.NewWorkerError(err, 0) } diff --git a/pkg/reactor/document_worker_test.go b/pkg/reactor/document_worker_test.go index 25d83b17..ba64ea40 100644 --- a/pkg/reactor/document_worker_test.go +++ b/pkg/reactor/document_worker_test.go @@ -55,20 +55,20 @@ func TestDocumentWorkerWork(t *testing.T) { return documentBlob, nil }, }, - &NodeContentProcessor{ - DownloadController: NewDownloadController(&TestReader{ + &nodeContentProcessor{ + downloadController: NewDownloadController(&TestReader{ make(map[string][]byte), }, &TestWriter{ make(map[string][]byte), }, 1, false, rhRegistry), - localityDomain: &localityDomain{ - mapping: map[string]*localityDomainValue{}, - }, - ResourceHandlers: rhRegistry, - }, - localityDomain{ - mapping: map[string]*localityDomainValue{}, + // localityDomain: &localityDomain{ + // mapping: map[string]*localityDomainValue{}, + // }, + resourceHandlers: rhRegistry, }, + // localityDomain{ + // mapping: map[string]*localityDomainValue{}, + // }, } testCases := []struct { diff --git a/pkg/reactor/integration_test.go b/pkg/reactor/integration_test.go index 04cd60b9..80eaeb2c 100644 --- a/pkg/reactor/integration_test.go +++ b/pkg/reactor/integration_test.go @@ -34,39 +34,45 @@ func _TestReactorWithGitHub(t *testing.T) { defer cancel() docs := &api.Documentation{ - Root: &api.Node{ - Name: "docs", - NodeSelector: &api.NodeSelector{ - Path: "https://github.com/gardener/gardener/tree/v1.10.0/docs", - }, - Nodes: []*api.Node{ - { - Name: "calico", - NodeSelector: &api.NodeSelector{ - Path: "https://github.com/gardener/gardener-extension-networking-calico/tree/master/docs", - }, + Structure: []*api.Node{ + &api.Node{ + Name: "docs", + NodeSelector: &api.NodeSelector{ + Path: "https://github.com/gardener/gardener/tree/v1.10.0/docs", }, - { - Name: "aws", - NodeSelector: &api.NodeSelector{ - Path: "https://github.com/gardener/gardener-extension-provider-aws/tree/master/docs", + Nodes: []*api.Node{ + { + Name: "calico", + NodeSelector: &api.NodeSelector{ + Path: "https://github.com/gardener/gardener-extension-networking-calico/tree/master/docs", + }, + }, + { + Name: "aws", + NodeSelector: &api.NodeSelector{ + Path: "https://github.com/gardener/gardener-extension-provider-aws/tree/master/docs", + }, }, }, }, }, - LocalityDomain: &api.LocalityDomain{ - LocalityDomainMap: map[string]*api.LocalityDomainValue{ - "github.com/gardener/gardener": &api.LocalityDomainValue{ - Version: "v1.10.0", - Path: "gardener/gardener/docs", + Links: &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "gardener/gardener/(blob|tree|raw)": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), }, - "github.com/gardener/gardener-extension-provider-aws": &api.LocalityDomainValue{ - Version: "master", - Path: "gardener/gardener-extension-provider-aws/docs", + "gardener/gardener-extension-provider-aws/(blob|tree|raw)": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.15.3"), }, - "github.com/gardener/gardener-extension-networking-calico": &api.LocalityDomainValue{ - Version: "master", - Path: "gardener/gardener-extension-networking-calico/docs", + "gardener/gardener-extension-networking-calico/(blob|tree|raw)": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.0"), + }, + }, + Downloads: &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "gardener/gardener/(blob|tree|raw)/v1.10.0/docs": nil, + "gardener/gardener-extension-provider-aws/(blob|tree|raw)/v1.15.3/docs": nil, + "gardener/gardener-extension-networking-calico/(blob|tree|raw)/v1.10.0/docs": nil, }, }, }, diff --git a/pkg/reactor/links.go b/pkg/reactor/links.go new file mode 100644 index 00000000..0c2915a4 --- /dev/null +++ b/pkg/reactor/links.go @@ -0,0 +1,280 @@ +package reactor + +import ( + "fmt" + "regexp" + "strings" + + "github.com/gardener/docforge/pkg/api" + "github.com/gardener/docforge/pkg/util/urls" + "github.com/google/uuid" + "k8s.io/klog/v2" +) + +// MatchForLinkRewrite tries recursively from this node +// up to the hierarchy root link rewrite rules attached +// to nodes and finally defined globally that match this +// URL to apply them and rewrite the link or return it +// untouched. +func MatchForLinkRewrite(absLink string, node *api.Node, globalRenameRules map[string]*api.LinkRewriteRule) (version *string, destination *string, text *string, title *string, isMatched bool) { + // first try the global rules. node rules, if any will overwrite + version, destination, text, title, isMatched = matchForLinkRewrite(absLink, version, destination, text, title, globalRenameRules) + nodes := node.Parents() + nodes = append(nodes, node) + for _, node := range nodes { + if l := node.Links; l != nil { + if version, destination, text, title, isMatched = matchForLinkRewrite(absLink, version, destination, text, title, node.Links.Rewrites); isMatched && destination != nil && len(*destination) == 0 { + // we got a destroy link rule. quit right here + return + } + if destination != nil && version != nil && text != nil && title == nil { + return + } + } + } + return +} + +func matchForLinkRewrite(absLink string, _version, _destination, _text, _title *string, rules map[string]*api.LinkRewriteRule) (version *string, destination *string, text *string, title *string, isMatched bool) { + var ( + regex *regexp.Regexp + err error + ) + for expr, rule := range rules { + if regex, err = regexp.Compile(expr); err != nil { + klog.Warningf("invalid link rewrite expression: %s, %s", expr, err.Error()) + continue + } + if regex.Match([]byte(absLink)) { + isMatched = true + if rule == nil { + empty := "" + return nil, &empty, &empty, nil, true + } + version = _version + if rule.Version != nil { + version = rule.Version + } + destination = _destination + if rule.Destination != nil { + destination = rule.Destination + } + text = _text + if rule.Text != nil { + text = rule.Text + } + title = _title + if rule.Title != nil { + title = rule.Title + } + } + } + return +} + +// MatchForDownload returns true if the provided URL is in the defined download scope +// for the node, and the resource name to use when serializing it. +func MatchForDownload(url *urls.URL, node *api.Node, globalDownloadRules *api.Downloads) (downloadResourceName string, isMatched bool) { + downloads := []*api.Downloads{} + if globalDownloadRules != nil { + downloads = append(downloads, globalDownloadRules) + } + nodes := node.Parents() + nodes = append(nodes, node) + for _, p := range nodes { + if l := p.Links; l != nil { + if l.Downloads != nil { + downloads = append(downloads, l.Downloads) + } + } + } + for i := len(downloads) - 1; i >= 0; i-- { + d := downloads[i] + if downloadResourceName, isMatched = matchForDownload(url, d); isMatched { + return + } + } + return "", false +} + +func matchForDownload(url *urls.URL, downloadRules *api.Downloads) (string, bool) { + var ( + regex *regexp.Regexp + downloadResourceName string + err error + ) + if downloadRules == nil { + return "", false + } + link := url.String() + for linkMatchExpr, linkRenameRules := range downloadRules.Scope { + if regex, err = regexp.Compile(linkMatchExpr); err != nil { + klog.Warningf("invalid link rewrite expression: %s, %s", linkMatchExpr, err.Error()) + continue + } + if regex.Match([]byte(link)) { + // check for match scope-specific rules for renaming downloads first + if renameRule := matchDownloadRenameRule(link, linkRenameRules); len(renameRule) > 0 { + downloadResourceName = expandVariables(url, renameRule) + return downloadResourceName, true + } + // check for match scope-agnostic, global rules for renaming downloads + if renameRule := matchDownloadRenameRule(link, downloadRules.Renames); len(renameRule) > 0 { + downloadResourceName = expandVariables(url, renameRule) + return downloadResourceName, true + } + // default download resource name + downloadResourceName := expandVariables(url, "$uuid$ext") + return downloadResourceName, true + } + } + // check for match scope-agnostic, global rules for renaming downloads + if renameRule := matchDownloadRenameRule(link, downloadRules.Renames); len(renameRule) > 0 { + downloadResourceName = expandVariables(url, renameRule) + return downloadResourceName, true + } + return "", false +} + +func matchDownloadRenameRule(link string, rules map[string]string) string { + var ( + renameRegex *regexp.Regexp + err error + ) + for linkRenameMatchExpr, renameRule := range rules { + if renameRegex, err = regexp.Compile(linkRenameMatchExpr); err != nil { + klog.Warningf("invalid link rewrite expression: %s, %s", linkRenameMatchExpr, err.Error()) + continue + } + if renameRegex.Match([]byte(link)) { + return renameRule + } + } + return "" +} + +func expandVariables(url *urls.URL, renameExpr string) string { + id := uuid.New().String() + s := renameExpr + s = strings.ReplaceAll(s, "$name", url.ResourceName) + s = strings.ReplaceAll(s, "$uuid", id) + s = strings.ReplaceAll(s, "$ext", fmt.Sprintf(".%s", url.Extension)) + return s +} + +func resolveNodeLinks(node *api.Node, globaLinks *api.Links) (links []*api.Links) { + nodes := node.Parents() + nodes = append(nodes, node) + links = []*api.Links{} + for i := len(nodes) - 1; i >= 0; i-- { + node = nodes[i] + if l := node.Links; l != nil { + links = append(links, l) + } + } + if globaLinks != nil { + links = append(links, globaLinks) + } + return links +} + +// TODO: code below is alternative experiment to handle +// matching link rewrite rules for a link based on merging +// all rules on each node, instead of calculating them dynamically +// up the parents chain. +// Problem unsolved is the priority of rules - they are merged +// and not ordered. +func resolveLinks(links *api.Links, nodes []*api.Node) { + for _, n := range nodes { + n.Links = mergeLinks(links, n.Links) + resolveLinks(n.Links, n.Nodes) + continue + } +} + +func mergeLinks(a, b *api.Links) *api.Links { + if b == nil { + return a + } + if a == nil { + return b + } + a.Rewrites = mergeRewrites(a.Rewrites, b.Rewrites) + a.Downloads = mergeDownloads(a.Downloads, b.Downloads) + return a +} + +func mergeRewrites(a, b map[string]*api.LinkRewriteRule) map[string]*api.LinkRewriteRule { + if len(b) == 0 { + return a + } + if len(a) == 0 { + return b + } + for k, v := range b { + if rule, ok := a[k]; ok { + a[k] = mergeLinkRewriteRule(rule, v) + continue + } + a[k] = v + } + return a +} + +func mergeLinkRewriteRule(a, b *api.LinkRewriteRule) *api.LinkRewriteRule { + if b.Version != nil { + a.Version = b.Version + } + if b.Destination != nil { + a.Destination = b.Destination + } + if b.Text != nil { + a.Text = b.Text + } + if b.Title != nil { + a.Title = b.Title + } + return a +} + +func mergeDownloads(a, b *api.Downloads) *api.Downloads { + if b == nil { + return a + } + if a == nil { + return b + } + a.Renames = mergeResourceRenameRule(a.Renames, b.Renames) + a.Scope = mergeDownloadScope(a.Scope, b.Scope) + return a +} + +func mergeResourceRenameRule(a, b api.ResourceRenameRules) api.ResourceRenameRules { + if len(b) == 0 { + return a + } + if len(a) == 0 { + return b + } + for k, v := range b { + a[k] = v + } + return a +} + +func mergeDownloadScope(a, b map[string]api.ResourceRenameRules) map[string]api.ResourceRenameRules { + if len(b) == 0 { + return a + } + if len(a) == 0 { + return b + } + for k, v := range b { + if rule, ok := a[k]; ok { + a[k] = mergeResourceRenameRule(rule, v) + continue + } + a[k] = v + } + return a +} diff --git a/pkg/reactor/links_test.go b/pkg/reactor/links_test.go new file mode 100644 index 00000000..c85f523a --- /dev/null +++ b/pkg/reactor/links_test.go @@ -0,0 +1,340 @@ +package reactor + +import ( + "testing" + + "github.com/gardener/docforge/pkg/api" + "github.com/gardener/docforge/pkg/util/tests" + "github.com/gardener/docforge/pkg/util/urls" + "github.com/stretchr/testify/assert" +) + +func Test_MatchForLinkRewrite(t *testing.T) { + testCases := []struct { + link string + globalRules map[string]*api.LinkRewriteRule + wantVersion *string + wantDestination *string + wantText *string + wantTitle *string + wantOK bool + mutateNode func(n *api.Node) + }{ + { + "abc", + map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Destination: tests.StrPtr("cda"), + }, + }, + nil, + tests.StrPtr("cda"), + nil, + nil, + true, + nil, + }, + { + "abc", + map[string]*api.LinkRewriteRule{ + "abc": nil, + }, + nil, + tests.StrPtr(""), + tests.StrPtr(""), + nil, + true, + nil, + }, + { + "abc", + map[string]*api.LinkRewriteRule{}, + nil, + nil, + nil, + nil, + false, + nil, + }, + { + "abc", + map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.1"), + Destination: tests.StrPtr("cda"), + Text: tests.StrPtr("Test"), + Title: tests.StrPtr("Test Title"), + }, + }, + tests.StrPtr("v1.10.1"), + tests.StrPtr("cda"), + tests.StrPtr("Test"), + tests.StrPtr("Test Title"), + true, + nil, + }, + { + "abc", + map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Destination: tests.StrPtr("abc"), + }, + }, + nil, + tests.StrPtr("cda"), + nil, + nil, + true, + func(n *api.Node) { + n.Links = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Destination: tests.StrPtr("cda"), + }, + }, + } + }, + }, + { + "abc", + map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.1"), + Destination: tests.StrPtr("abc"), + Text: tests.StrPtr("Test"), + Title: tests.StrPtr("Test Title"), + }, + }, + tests.StrPtr("v1.10.1"), + tests.StrPtr("cda"), + tests.StrPtr("Test"), + tests.StrPtr("Test Title"), + true, + func(n *api.Node) { + n.Links = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Destination: tests.StrPtr("cda"), + }, + }, + } + }, + }, + { + "abc", + map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Version: tests.StrPtr("v1.10.1"), + }, + }, + tests.StrPtr("v1.10.1"), + tests.StrPtr("cda"), + tests.StrPtr("Test"), + tests.StrPtr("Test Title"), + true, + func(n *api.Node) { + n.Links = &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Text: tests.StrPtr("Test"), + Destination: tests.StrPtr("cda"), + }, + }, + } + n1 := &api.Node{ + Nodes: []*api.Node{n}, + } + n.SetParent(n1) + n2 := &api.Node{ + Links: &api.Links{ + Rewrites: map[string]*api.LinkRewriteRule{ + "abc": &api.LinkRewriteRule{ + Title: tests.StrPtr("Test Title"), + }, + }, + }, + Nodes: []*api.Node{n1}, + } + n1.SetParent(n2) + }, + }, + } + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + n := &api.Node{} + if tc.mutateNode != nil { + tc.mutateNode(n) + } + gotVersion, gotDestination, gotText, gotTitle, gotOK := MatchForLinkRewrite(tc.link, n, tc.globalRules) + assert.Equal(t, tc.wantOK, gotOK) + if gotVersion != nil && tc.wantVersion == nil { + t.Errorf("expected version to be nil but it was %s", *gotVersion) + } else if gotVersion == nil && tc.wantVersion != nil { + t.Errorf("expected version to be %s but it was nil", *tc.wantVersion) + } else if gotVersion != nil && tc.wantVersion != nil { + assert.Equal(t, *tc.wantVersion, *gotVersion) + } else { + assert.Nil(t, gotVersion) + } + if gotDestination != nil && tc.wantDestination == nil { + t.Errorf("expected destination to be nil but it was %s", *gotDestination) + } else if gotDestination == nil && tc.wantDestination != nil { + t.Errorf("expected destination to be %s but it was nil", *tc.wantDestination) + } else if gotDestination != nil && tc.wantDestination != nil { + assert.Equal(t, *tc.wantDestination, *gotDestination) + } else { + assert.Nil(t, gotDestination) + } + if gotText != nil && tc.wantText == nil { + t.Errorf("expected text to be nil but it was %s", *gotText) + } else if gotDestination == nil && tc.wantText != nil { + t.Errorf("expected text to be %s but it was nil", *tc.wantText) + } else if gotText != nil && tc.wantText != nil { + assert.Equal(t, *tc.wantText, *gotText) + } else { + assert.Nil(t, gotText) + } + if gotTitle != nil && tc.wantTitle == nil { + t.Errorf("expected title to be nil but it was %s", *gotTitle) + } else if gotDestination == nil && tc.wantTitle != nil { + t.Errorf("expected title to be %s but it was nil", *tc.wantTitle) + } else if gotTitle != nil && tc.wantTitle != nil { + assert.Equal(t, *tc.wantTitle, *gotTitle) + } else { + assert.Nil(t, gotTitle) + } + }) + } +} + +func Test_MatchForDownload(t *testing.T) { + testCases := []struct { + link string + globalRules *api.Downloads + wantDownloadName string + wantOK bool + mutateNode func(n *api.Node) + }{ + { + "abc", + &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "abc": api.ResourceRenameRules{ + "abc": "cda", + }, + }, + }, + "cda", + true, + nil, + }, + { + "abc", + &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "ABC": api.ResourceRenameRules{ + "abc": "cda", + }, + }, + }, + "", + false, + nil, + }, + { + "abc", + &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "abc": api.ResourceRenameRules{ + "abc": "cda", + }, + }, + }, + "def", + true, + func(n *api.Node) { + n.Links = &api.Links{ + Downloads: &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "abc": api.ResourceRenameRules{ + "abc": "def", + }, + }, + }, + } + }, + }, + { + "abc/a.md", + &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "abc": api.ResourceRenameRules{ + "abc": "$name-test", + }, + }, + }, + "a-test", + true, + nil, + }, + { + "abc/a.md", + &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "abc": api.ResourceRenameRules{ + "abc": "$name-test$ext", + }, + }, + }, + "a-test.md", + true, + nil, + }, + { + "abc/a.md", + &api.Downloads{ + Scope: map[string]api.ResourceRenameRules{ + "abc": api.ResourceRenameRules{ + "abc": "$name-0$ext", + }, + }, + Renames: map[string]string{ + "abc": "$name-1$ext", + }, + }, + "a-0.md", + true, + nil, + }, + { + "abc/a.md", + &api.Downloads{ + Renames: map[string]string{ + "\\.(md)": "$name-1$ext", + }, + }, + "a-1.md", + true, + nil, + }, + } + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + var ( + link *urls.URL + err error + ) + n := &api.Node{} + if tc.mutateNode != nil { + tc.mutateNode(n) + } + if link, err = urls.Parse(tc.link); err != nil { + t.Fatalf("%v", err) + return + } + gotName, gotOK := MatchForDownload(link, n, tc.globalRules) + assert.Equal(t, tc.wantOK, gotOK) + assert.Equal(t, tc.wantDownloadName, gotName) + }) + } +} diff --git a/pkg/reactor/localitydomain.go b/pkg/reactor/localitydomain.go deleted file mode 100644 index 239c86cb..00000000 --- a/pkg/reactor/localitydomain.go +++ /dev/null @@ -1,343 +0,0 @@ -package reactor - -import ( - "fmt" - "reflect" - "regexp" - "strings" - - "github.com/gardener/docforge/pkg/api" - "github.com/gardener/docforge/pkg/resourcehandlers" - "github.com/gardener/docforge/pkg/util/urls" - "github.com/google/uuid" - "k8s.io/klog/v2" -) - -// localityDomain contains the entries defining a -// locality domain scope. Each entry is a mapping -// between a domain, such as github.com/gardener/gardener, -// and a path in it that defines "local" resources. -// Documents referenced by documentation node structure -// are always part of the locality domain. Other -// resources referenced by those documents are checked -// against the path hierarchy of locality domain -// entries to determine hwo they will be processed. -type localityDomain struct { - mapping - downloadSubstitutes map[string]string -} -type mapping map[string]*localityDomainValue - -// LocalityDomainValue encapsulates the members of a -// localityDomain entry value -type localityDomainValue struct { - // Version is the version of the resources in this - // locality domain - Version string - // Path defines the scope of this locality domain - // and is relative to it - Path string - Include []string - Exclude []string -} - -func copyMap(s map[string]string) map[string]string { - _s := make(map[string]string) - for k, v := range s { - _s[k] = v - } - return _s -} - -// fromAPI creates new localityDomain copy object from -// api.LocalityDomain -func copyLocalityDomain(ld *api.LocalityDomain) *localityDomain { - localityDomain := &localityDomain{ - mapping: map[string]*localityDomainValue{}, - } - for k, v := range ld.LocalityDomainMap { - localityDomain.mapping[k] = &localityDomainValue{ - v.Version, - v.Path, - v.Include, - v.Exclude, - } - } - localityDomain.downloadSubstitutes = copyMap(ld.DownloadSubstitutes) - return localityDomain -} - -// Set creates or updates a locality domain entry -// with key and path. An update is performed when -// the path is ancestor оф the existing path for -// that key. -func (ld localityDomain) Set(key, path, version string) { - var ( - existingLD *localityDomainValue - ok bool - ) - if existingLD, ok = ld.mapping[key]; !ok { - ld.mapping[key] = &localityDomainValue{ - version, - path, - nil, - nil, - } - return - } - - localityDomain := strings.Split(existingLD.Path, "/") - localityDomainCandidate := strings.Split(path, "/") - for i := range localityDomain { - if len(localityDomainCandidate) <= i || localityDomain[i] != localityDomainCandidate[i] { - ld.mapping[key].Path = strings.Join(localityDomain[:i], "/") - return - } - } -} - -// MatchPathInLocality determines if a given link is in the locality domain scope -// and returns the link with version matching the one of the matched locality -// domain. -func (ld localityDomain) MatchPathInLocality(link string, rhs resourcehandlers.Registry) (string, bool) { - if rh := rhs.Get(link); rh != nil { - var ( - key, path string - err error - ) - if key, path, _, err = rh.GetLocalityDomainCandidate(link); err != nil { - return link, false - } - localityDomain, ok := ld.mapping[key] - if !ok { - return link, false - } - - var exclude, include bool - // check if the link is not in locality scope by explicit exclude - if len(localityDomain.Exclude) > 0 { - for _, rx := range localityDomain.Exclude { - if exclude, err = regexp.MatchString(rx, link); err != nil { - klog.Warningf("exclude pattern match %s failed for %s\n", localityDomain.Exclude, link) - } - if exclude { - break - } - } - } - // check if the link is in locality scope by explicit include - if len(localityDomain.Include) > 0 { - for _, rx := range localityDomain.Include { - if include, err = regexp.MatchString(rx, link); err != nil { - klog.Warningf("include pattern match %s failed for %s\n", localityDomain.Include, link) - } - if include { - exclude = false - break - } - } - } - if exclude { - if link, err = rh.SetVersion(link, localityDomain.Version); err != nil { - klog.Errorf("%v\n", err) - return link, false - } - return link, false - } - - prefix := localityDomain.Path - // FIXME: this is tmp valid only for github urls - if strings.HasPrefix(path, prefix) { - if link, err = rh.SetVersion(link, localityDomain.Version); err != nil { - klog.Errorf("%v\n", err) - return link, false - } - return link, true - } - // check if in the same repo and then enforce versions rewrite - _s := strings.Split(prefix, "/") - _s = _s[:len(_s)-1] - repoPrefix := strings.Join(_s, "/") - if strings.HasPrefix(path, repoPrefix) { - if link, err = rh.SetVersion(link, localityDomain.Version); err != nil { - klog.Errorf("%v\n", err) - return link, false - } - } - } - return link, false -} - -// PathInLocality determines if a given link is in the locality domain scope -func (ld localityDomain) PathInLocality(link string, rhs resourcehandlers.Registry) bool { - if rh := rhs.Get(link); rh != nil { - var ( - key, path, version string - err error - ) - if key, path, version, err = rh.GetLocalityDomainCandidate(link); err != nil { - return false - } - localityDomain, ok := ld.mapping[key] - if !ok { - return false - } - klog.V(6).Infof("Path %s in locality domain %s: %v\n", path, localityDomain, strings.HasPrefix(path, localityDomain.Path)) - // TODO: locality domain to be constructed from key for comparison - return reflect.DeepEqual(localityDomain, &localityDomainValue{ - version, - path, - localityDomain.Include, - localityDomain.Exclude, - }) - } - return false -} - -func (ld localityDomain) GetDownloadedResourceName(u *urls.URL) string { - k := strings.TrimPrefix(u.Path, "/") - id := uuid.New().String() - if len(ld.downloadSubstitutes) > 0 { - for substituteMatcher, s := range ld.downloadSubstitutes { - var ( - matched bool - err error - ) - if matched, err = regexp.MatchString(substituteMatcher, k); err != nil { - klog.Warningf("download substitution pattern match %s failed for %s\n", substituteMatcher, k) - break - } - if matched { - s = strings.ReplaceAll(s, "$name", u.ResourceName) - s = strings.ReplaceAll(s, "$uuid", id) - s = strings.ReplaceAll(s, "$path", u.ResourcePath) - s = strings.ReplaceAll(s, "$ext", u.Extension) - return s - } - } - } - if len(u.Extension) > 0 { - s := fmt.Sprintf("%s.%s", id, u.Extension) - return s - } - return id -} - -// setLocalityDomainForNode visits all content selectors in the node and its -// descendants to build a localityDomain -func localityDomainFromNode(node *api.Node, rhs resourcehandlers.Registry) (*localityDomain, error) { - var localityDomains = &localityDomain{ - mapping: map[string]*localityDomainValue{}, - } - if err := csHandle(node.ContentSelectors, localityDomains, rhs); err != nil { - return nil, err - } - if node.Nodes != nil { - if err := fromNodes(node.Nodes, localityDomains, rhs); err != nil { - return nil, err - } - } - return localityDomains, nil -} - -func csHandle(contentSelectors []api.ContentSelector, localityDomains *localityDomain, rhs resourcehandlers.Registry) error { - for _, cs := range contentSelectors { - if rh := rhs.Get(cs.Source); rh != nil { - key, path, version, err := rh.GetLocalityDomainCandidate(cs.Source) - if err != nil { - return err - } - localityDomains.Set(key, path, version) - } - } - return nil -} - -func fromNodes(nodes []*api.Node, localityDomains *localityDomain, rhs resourcehandlers.Registry) error { - for _, node := range nodes { - csHandle(node.ContentSelectors, localityDomains, rhs) - if err := fromNodes(node.Nodes, localityDomains, rhs); err != nil { - return err - } - } - return nil -} - -// ResolveLocalityDomain resolves the actual locality domain for a node, -// considering the global one (if any) and locally defined one. -// If no localityDomain is defined on the node the function returns nil -func resolveLocalityDomain(node *api.Node, globalLD *localityDomain) *localityDomain { - if nodeLD := node.LocalityDomain; nodeLD != nil { - nodeLD := copyLocalityDomain(nodeLD) - if globalLD == nil { - return copyLocalityDomain(node.LocalityDomain) - } - ld := &localityDomain{ - mapping: map[string]*localityDomainValue{}, - } - for k, v := range globalLD.mapping { - ld.mapping[k] = &localityDomainValue{ - v.Version, - v.Path, - v.Exclude, - v.Include, - } - } - mergeLocalityDomain(ld, nodeLD) - return ld - } - return globalLD -} - -func mergeLocalityDomain(a, b *localityDomain) *localityDomain { - if a == nil || b == nil { - panic("cannot merge nil localityDomain arguments") - } - a.downloadSubstitutes = mergeDownloadSubstitutes(a.downloadSubstitutes, b.downloadSubstitutes) - for k, v := range b.mapping { - v := mergeLocalityDomainValue(a.mapping[k], v) - a.mapping[k] = v - } - return a -} - -// replaces Version and Path from b in a if any -// merges Exclude and Include from b in a if any -// merges DownloadSubstitutes from b in a if any, -// replacing duplicate entries in a with entries from b. -func mergeLocalityDomainValue(a, b *localityDomainValue) *localityDomainValue { - if len(b.Version) > 0 { - a.Version = b.Version - } - if len(b.Path) > 0 { - a.Path = b.Path - } - if len(b.Exclude) > 0 { - _e := []string{} - if len(a.Exclude) > 0 { - _e = append(_e, a.Exclude...) - } - a.Exclude = append(_e, b.Exclude...) - } - if len(b.Include) > 0 { - _e := []string{} - if len(a.Include) > 0 { - _e = append(_e, a.Include...) - } - a.Include = append(_e, b.Include...) - } - return a -} - -func mergeDownloadSubstitutes(a, b map[string]string) map[string]string { - if len(a) > 0 && len(b) < 1 { - return a - } - if len(a) < 1 && len(b) > 0 { - return b - } - for k, v := range b { - a[k] = v - } - return a -} diff --git a/pkg/reactor/localitydomain_test.go b/pkg/reactor/localitydomain_test.go deleted file mode 100644 index 3977a73a..00000000 --- a/pkg/reactor/localitydomain_test.go +++ /dev/null @@ -1,223 +0,0 @@ -package reactor - -import ( - "reflect" - "testing" - - "github.com/gardener/docforge/pkg/resourcehandlers" - "github.com/gardener/docforge/pkg/resourcehandlers/github" - - "github.com/gardener/docforge/pkg/api" -) - -func TestGitHubLocalityDomain_Set(t *testing.T) { - - tests := []struct { - name string - localityDomain *localityDomain - key string - urls []string - expected *localityDomainValue - }{ - { - name: "Should return the same and already existing locality domain", - localityDomain: &localityDomain{ - mapping: map[string]*localityDomainValue{ - "https://github.com/gardener/gardener": &localityDomainValue{ - "master", - "/gardener/gardener/master/docs", - nil, - nil, - }, - }, - }, - key: "https://github.com/gardener/gardener", - urls: []string{"/gardener/gardener/master/docs"}, - expected: &localityDomainValue{ - "master", - "/gardener/gardener/master/docs", - nil, - nil, - }, - }, - { - name: "Should return the candidate locality domain as it is higher in the hierarchy", - localityDomain: &localityDomain{ - mapping: map[string]*localityDomainValue{ - "https://github.com/gardener/gardener": &localityDomainValue{ - "master", - "/gardener/gardener/master/docs", - nil, - nil, - }, - }, - }, - key: "github.com/gardener/gardener", - urls: []string{"/gardener/gardener/master", "/gardener/gardener/master/docs/concepts", "/gardener/gardener/master/docs/concepts/apiserver.md"}, - expected: &localityDomainValue{ - "master", - "/gardener/gardener/master", - nil, - nil, - }, - }, - { - name: "Should return one level higher because both are on the same level in the hierarchy", - localityDomain: &localityDomain{ - mapping: map[string]*localityDomainValue{}, - }, - key: "github.com/gardener/gardener", - urls: []string{"/gardener/gardener/master/examples", "/gardener/gardener/master"}, - expected: &localityDomainValue{ - "master", - "/gardener/gardener/master", - nil, - nil, - }, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - ld := tc.localityDomain - for _, url := range tc.urls { - ld.Set(tc.key, url, "master") - } - - if !reflect.DeepEqual(ld.mapping[tc.key], tc.expected) { - t.Errorf("test failed %s != %s", ld.mapping[tc.key], tc.expected) - } - }) - } -} - -func Test_SetLocalityDomainForNode(t *testing.T) { - tests := []struct { - name string - want *localityDomain - wantErr bool - mutate func(newDoc *api.Documentation) - }{ - { - name: "Should return the expected locality domain", - want: &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/org/repo": &localityDomainValue{ - "master", - "org/repo/docs", - nil, - nil, - }, - }, - }, - wantErr: false, - mutate: func(newDoc *api.Documentation) { - newDoc.Root.ContentSelectors = []api.ContentSelector{ - {Source: "https://github.com/org/repo/tree/master/docs/concepts"}, - {Source: "https://github.com/org/repo/tree/master/docs/architecture"}, - } - }, - }, - { - name: "Should return the expected locality domain", - want: &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/org/repo": &localityDomainValue{ - "master", - "org/repo/docs", - nil, - nil, - }, - }, - }, - wantErr: false, - mutate: func(newDoc *api.Documentation) { - newDoc.Root.ContentSelectors = []api.ContentSelector{ - {Source: "https://github.com/org/repo/tree/master/docs"}, - {Source: "https://github.com/org/repo/tree/master/docs/architecture"}, - } - }, - }, - { - name: "Should return the expected locality domain", - want: &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/org/repo": &localityDomainValue{ - "master", - "org/repo", - nil, - nil, - }, - }, - }, - wantErr: false, - mutate: func(newDoc *api.Documentation) { - newDoc.Root.ContentSelectors = []api.ContentSelector{ - {Source: "https://github.com/org/repo/tree/master/docs"}, - {Source: "https://github.com/org/repo/tree/master/example"}, - } - }, - }, - { - name: "Should return the expected locality domain", - want: &localityDomain{ - mapping: map[string]*localityDomainValue{ - "github.com/org/repo": &localityDomainValue{ - "master", - "org/repo", - nil, - nil, - }, - "github.com/org/repo2": &localityDomainValue{ - "master", - "org/repo2/example", - nil, - nil, - }, - }, - }, - wantErr: false, - mutate: func(newDoc *api.Documentation) { - newDoc.Root.ContentSelectors = []api.ContentSelector{ - {Source: "https://github.com/org/repo/tree/master/docs"}, - {Source: "https://github.com/org/repo/tree/master/example"}, - } - newDoc.Root.Nodes = []*api.Node{ - { - Name: "anotherrepo", - ContentSelectors: []api.ContentSelector{{Source: "https://github.com/org/repo2/tree/master/example"}}, - }, - } - }, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - newDoc := createNewDocumentation() - gh := github.NewResourceHandler(nil, []string{"github.com"}) - rhs := resourcehandlers.NewRegistry(gh) - tc.mutate(newDoc) - got, err := localityDomainFromNode(newDoc.Root, rhs) - if (err != nil) != tc.wantErr { - t.Errorf("SetLocalityDomainForNode() error = %v, wantErr %v", err, tc.wantErr) - return - } - for k, v := range tc.want.mapping { - var ( - _v *localityDomainValue - ok bool - ) - if _v, ok = got.mapping[k]; !ok { - t.Errorf("want %s:%v, got %s:%v", k, v, k, _v) - } else { - if _v.Path != v.Path { - t.Errorf("want path %s, got %s", v.Path, _v.Path) - } - if _v.Version != v.Version { - t.Errorf("want version %s, got %s", v.Version, _v.Version) - } - } - } - rhs.Remove() - }) - } -} diff --git a/pkg/reactor/reactor.go b/pkg/reactor/reactor.go index 12de2c26..6046fa33 100644 --- a/pkg/reactor/reactor.go +++ b/pkg/reactor/reactor.go @@ -3,9 +3,12 @@ package reactor import ( "context" "fmt" + "io" "os" + "strings" "github.com/gardener/docforge/pkg/processors" + "github.com/google/uuid" "k8s.io/klog/v2" "github.com/gardener/docforge/pkg/api" @@ -30,6 +33,7 @@ type Options struct { ResourceHandlers []resourcehandlers.ResourceHandler DryRunWriter writers.DryRunWriter Resolve bool + GlobalLinksConfig *api.Links } // NewReactor creates a Reactor from Options @@ -39,7 +43,7 @@ func NewReactor(o *Options) *Reactor { worker := &DocumentWorker{ Writer: o.Writer, Reader: &GenericReader{rhRegistry}, - NodeContentProcessor: NewNodeContentProcessor(o.ResourcesPath, nil, downloadController, o.FailFast, o.MarkdownFmt, o.RewriteEmbedded, rhRegistry), + NodeContentProcessor: NewNodeContentProcessor(o.ResourcesPath, o.GlobalLinksConfig, downloadController, o.FailFast, o.MarkdownFmt, o.RewriteEmbedded, rhRegistry), Processor: o.Processor, } docController := NewDocumentController(worker, o.MaxWorkersCount, o.FailFast) @@ -56,84 +60,148 @@ func NewReactor(o *Options) *Reactor { // Reactor orchestrates the documentation build workflow type Reactor struct { - FailFast bool - ResourceHandlers resourcehandlers.Registry - localityDomain *localityDomain + FailFast bool + ResourceHandlers resourcehandlers.Registry + // localityDomain *localityDomain DocController DocumentController DownloadController DownloadController DryRunWriter writers.DryRunWriter Resolve bool } -// Run starts build operation on docStruct -func (r *Reactor) Run(ctx context.Context, docStruct *api.Documentation, dryRun bool) error { - var ( - err error - ld *localityDomain - ) +// Run starts build operation on documentation +func (r *Reactor) Run(ctx context.Context, manifest *api.Documentation, dryRun bool) error { ctx, cancel := context.WithCancel(ctx) defer func() { + if r.Resolve { + if err := printResolved(ctx, manifest, os.Stdout); err != nil { + klog.Errorf("failed to print resolved manifest: %s", err.Error()) + } + } cancel() if dryRun { r.DryRunWriter.Flush() } }() - if err := r.ResolveStructure(ctx, docStruct.Root); err != nil { + if err := r.ResolveManifest(ctx, manifest); err != nil { return err } - if docStruct.LocalityDomain != nil { - ld = copyLocalityDomain(docStruct.LocalityDomain) - if ld == nil || len(ld.mapping) == 0 { - if ld, err = localityDomainFromNode(docStruct.Root, r.ResourceHandlers); err != nil { - return err - } - r.localityDomain = ld - } + klog.V(4).Info("Building documentation structure\n\n") + if err := r.Build(ctx, manifest.Structure); err != nil { + return err } - if r.Resolve { - s, err := api.Serialize(docStruct) - if err != nil { + return nil +} + +// ResolveManifest resolves the manifests into buildable model +func (r *Reactor) ResolveManifest(ctx context.Context, manifest *api.Documentation) error { + var ( + structure []*api.Node + err error + ) + if manifest.NodeSelector != nil { + if structure, err = r.resolveManifestNodeSelector(ctx, manifest.NodeSelector); err != nil { return err } - os.Stdout.Write([]byte(s)) - os.Stdout.Write([]byte("\n\n")) + } + if structure == nil { + structure = manifest.Structure + } else { + // TODO: this should be rather merge than append + structure = append(manifest.Structure, structure...) } - klog.V(4).Info("Building documentation structure\n\n") - if err = r.Build(ctx, docStruct.Root, ld); err != nil { + if structure == nil { + return fmt.Errorf("document structure resolved to nil") + } + + if err = r.resolveStructure(ctx, structure, manifest.Links); err != nil { return err } + manifest.Structure = structure return nil } -// ResolveStructure builds the subnodes hierarchy of a node based on the natural nodes -// hierarchy and on rules such as those in NodeSelector. -// The node hierarchy is resolved by an appropriate handler selected based -// on the NodeSelector path URI +func printResolved(ctx context.Context, manifest *api.Documentation, writer io.Writer) error { + // for _, node := range manifest.Structure { + // if links := resolveNodeLinks(node, manifest.Links); len(links) > 0 { + // for _, l := range links { + // l := mergeLinks(node.ResolvedLinks, l) + // node.ResolvedLinks = l + // } + // } + // // remove resolved links for container nodes + // if node.Nodes != nil { + // node.ResolvedLinks = nil + // } + // } + s, err := api.Serialize(manifest) + if err != nil { + return err + } + writer.Write([]byte(s)) + writer.Write([]byte("\n\n")) + return nil +} + +// ResolveStructure resolves the following in a structure model: +// - Node name variables +// - NodeSelectors // The resulting model is the actual flight plan for replicating resources. -func (r *Reactor) ResolveStructure(ctx context.Context, node *api.Node) error { - node.SetParentsDownwards() - if node.NodeSelector != nil { - var handler resourcehandlers.ResourceHandler - if handler = r.ResourceHandlers.Get(node.NodeSelector.Path); handler == nil { - return fmt.Errorf("No suitable handler registered for path %s", node.NodeSelector.Path) +func (r *Reactor) resolveStructure(ctx context.Context, nodes []*api.Node, globalLinksConfig *api.Links) error { + var handler resourcehandlers.ResourceHandler + for _, node := range nodes { + node.SetParentsDownwards() + if len(node.Source) > 0 { + if handler = r.ResourceHandlers.Get(node.Source); handler == nil { + return fmt.Errorf("No suitable handler registered for URL %s", node.Source) + } + if len(node.Name) == 0 { + node.Name = "$name" + } + name, ext := handler.ResourceName(node.Source) + id := uuid.New().String() + node.Name = strings.ReplaceAll(node.Name, "$name", name) + node.Name = strings.ReplaceAll(node.Name, "$uuid", id) + node.Name = strings.ReplaceAll(node.Name, "$ext", fmt.Sprintf(".%s", ext)) } - if err := handler.ResolveNodeSelector(ctx, node); err != nil { - return err + if node.NodeSelector != nil { + if handler = r.ResourceHandlers.Get(node.NodeSelector.Path); handler == nil { + return fmt.Errorf("No suitable handler registered for path %s", node.NodeSelector.Path) + } + if err := handler.ResolveNodeSelector(ctx, node, node.NodeSelector.ExcludePaths, node.NodeSelector.FrontMatter, node.NodeSelector.ExcludeFrontMatter, node.NodeSelector.Depth); err != nil { + return err + } + // remove node selectors after resolution + node.NodeSelector = nil } - // remove node selectors after resolution - node.NodeSelector = nil - } - if len(node.Nodes) > 0 { - for _, n := range node.Nodes { - if err := r.ResolveStructure(ctx, n); err != nil { + if len(node.Nodes) > 0 { + if err := r.resolveStructure(ctx, node.Nodes, globalLinksConfig); err != nil { return err } } } return nil } + +// ResolveNodeSelector returns resolved nodeSelector nodes structure +func (r *Reactor) resolveManifestNodeSelector(ctx context.Context, nodeSelector *api.NodeSelector) ([]*api.Node, error) { + var handler resourcehandlers.ResourceHandler + if nodeSelector != nil { + node := &api.Node{ + NodeSelector: nodeSelector, + } + if handler = r.ResourceHandlers.Get(nodeSelector.Path); handler == nil { + return nil, fmt.Errorf("No suitable handler registered for path %s", nodeSelector.Path) + } + if err := handler.ResolveNodeSelector(ctx, node, nodeSelector.ExcludePaths, nodeSelector.ExcludeFrontMatter, nodeSelector.FrontMatter, nodeSelector.Depth); err != nil { + return nil, err + } + return node.Nodes, nil + } + return nil, nil +} diff --git a/pkg/reactor/reactor_test.go b/pkg/reactor/reactor_test.go index 1637331f..fbddd34f 100644 --- a/pkg/reactor/reactor_test.go +++ b/pkg/reactor/reactor_test.go @@ -15,14 +15,12 @@ func init() { var ( apiRefNode = &api.Node{ Name: "apiRef", - Title: "API Reference", ContentSelectors: []api.ContentSelector{{Source: "https://github.com/org/repo/tree/master/docs/architecture/apireference.md"}}, } archNode = &api.Node{ Name: "arch", ContentSelectors: []api.ContentSelector{{Source: "https://github.com/org/repo/tree/master/docs/architecture"}}, - Title: "Architecture", Nodes: []*api.Node{ apiRefNode, }, @@ -31,26 +29,25 @@ var ( blogNode = &api.Node{ Name: "blog", ContentSelectors: []api.ContentSelector{{Source: "https://github.com/org/repo/tree/master/docs/blog/blog-part1.md"}}, - Title: "Blog", } tasksNode = &api.Node{ Name: "tasks", - Title: "Tasks", ContentSelectors: []api.ContentSelector{{Source: "https://github.com/org/repo/tree/master/docs/tasks"}}, } ) func createNewDocumentation() *api.Documentation { return &api.Documentation{ - Root: &api.Node{ - Name: "rootNode", - Title: "Root node!", - ContentSelectors: []api.ContentSelector{{Source: "https://github.com/org/repo/tree/master/docs"}}, - Nodes: []*api.Node{ - archNode, - blogNode, - tasksNode, + Structure: []*api.Node{ + &api.Node{ + Name: "rootNode", + ContentSelectors: []api.ContentSelector{{Source: "https://github.com/org/repo/tree/master/docs"}}, + Nodes: []*api.Node{ + archNode, + blogNode, + tasksNode, + }, }, }, } @@ -62,7 +59,7 @@ func (f *FakeResourceHandler) Accept(uri string) bool { return true } -func (f *FakeResourceHandler) ResolveNodeSelector(ctx context.Context, node *api.Node) error { +func (f *FakeResourceHandler) ResolveNodeSelector(ctx context.Context, node *api.Node, excludePaths []string, frontMatter map[string]interface{}, excludeFrontMatter map[string]interface{}, depth int32) error { return nil } @@ -74,6 +71,10 @@ func (f *FakeResourceHandler) Name(uri string) string { return uri } +func (f *FakeResourceHandler) ResourceName(uri string) (string, string) { + return "", "" +} + func (f *FakeResourceHandler) BuildAbsLink(source, relLink string) (string, error) { return relLink, nil } diff --git a/pkg/resourcehandlers/github/github_resource_handler.go b/pkg/resourcehandlers/github/github_resource_handler.go index b6b76278..99d336e5 100644 --- a/pkg/resourcehandlers/github/github_resource_handler.go +++ b/pkg/resourcehandlers/github/github_resource_handler.go @@ -15,6 +15,7 @@ import ( "github.com/gardener/docforge/pkg/api" "github.com/gardener/docforge/pkg/resourcehandlers" + "github.com/gardener/docforge/pkg/util/urls" "github.com/google/go-github/v32/github" ) @@ -69,15 +70,16 @@ func TreeEntryToGitHubLocator(treeEntry *github.TreeEntry, shaAlias string) *Res } // Recursively adds or merges nodes built from flat ResourceLocators list to node.Nodes -func buildNodes(node *api.Node, childResourceLocators []*ResourceLocator, cache *Cache) { +func buildNodes(node *api.Node, excludePaths []string, frontMatter map[string]interface{}, excludeFrontMatter map[string]interface{}, depth int32, childResourceLocators []*ResourceLocator, cache *Cache, currentDepth int32) (err error) { var ( nodePath string nodeResourceLocator *ResourceLocator + regex *regexp.Regexp ) if node.NodeSelector != nil { nodePath = node.NodeSelector.Path - } else if len(node.ContentSelectors) > 0 { - nodePath = node.ContentSelectors[0].Source + } else if len(node.Source) > 0 { + nodePath = node.Source } if nodeResourceLocator = cache.Get(nodePath); nodeResourceLocator == nil { panic(fmt.Sprintf("Node is not available as ResourceLocator %v", nodePath)) @@ -87,33 +89,54 @@ func buildNodes(node *api.Node, childResourceLocators []*ResourceLocator, cache if !strings.HasPrefix(childResourceLocator.Path, nodeResourceLocator.Path) { continue } - childPathSegmentsCount := len(strings.Split(childResourceLocator.Path, "/")) - childName := childResourceLocator.GetName() - // 1 sublevel only - if (childPathSegmentsCount - nodePathSegmentsCount) == 1 { - // folders and .md files only - if childResourceLocator.Type == Blob && !strings.HasSuffix(strings.ToLower(childName), ".md") { - continue - } - childName := strings.TrimSuffix(childName, ".md") - n := &api.Node{ - ContentSelectors: []api.ContentSelector{{Source: childResourceLocator.String()}}, - Name: childName, + // check if this resource path has to be excluded + exclude := false + for _, excludePath := range excludePaths { + if regex, err = regexp.Compile(excludePath); err != nil { + return fmt.Errorf("invalid path exclude expression %s: %w", excludePath, err) } - n.SetParent(node) - if node.Nodes == nil { - node.Nodes = make([]*api.Node, 0) + urlString := childResourceLocator.String() + if regex.Match([]byte(urlString)) { + exclude = true + break } + } + if !exclude { + childPathSegmentsCount := len(strings.Split(childResourceLocator.Path, "/")) + childName := childResourceLocator.GetName() + // 1 sublevel only + if (childPathSegmentsCount - nodePathSegmentsCount) == 1 { + // folders and .md files only + if childResourceLocator.Type == Blob && !strings.HasSuffix(strings.ToLower(childName), ".md") { + continue + } + n := &api.Node{ + Name: childName, + Source: childResourceLocator.String(), + } + n.SetParent(node) + if node.Nodes == nil { + node.Nodes = make([]*api.Node, 0) + } - node.Nodes = append(node.Nodes, n) + node.Nodes = append(node.Nodes, n) - // recursively build subnodes if entry is sub-tree - if childResourceLocator.Type == Tree { - childResourceLocators = cache.GetSubset(childResourceLocator.String()) - buildNodes(n, childResourceLocators, cache) + // recursively build subnodes if entry is sub-tree + if childResourceLocator.Type == Tree { + if depth > 0 && depth == currentDepth { + continue + } + currentDepth++ + childResourceLocators = cache.GetSubset(childResourceLocator.String()) + if err = buildNodes(n, excludePaths, frontMatter, excludeFrontMatter, depth, childResourceLocators, cache, currentDepth); err != nil { + return err + } + currentDepth-- + } } } } + return } // - remove contentSources that reference tree objects. They are used @@ -123,13 +146,17 @@ func buildNodes(node *api.Node, childResourceLocators []*ResourceLocator, cache // containing for example images only adn thus irrelevant to the // documentation structure func cleanupNodeTree(node *api.Node) { - if len(node.ContentSelectors) > 0 { - source := node.ContentSelectors[0].Source + if len(node.Source) > 0 { + source := node.Source if rl, _ := parse(source); rl.Type == Tree { - node.ContentSelectors = nil + node.Source = "" } } for _, n := range node.Nodes { + // skip nested unresolved nodeSelector nodes from cleanup + if n.NodeSelector != nil && len(n.Nodes) == 0 { + continue + } cleanupNodeTree(n) } childrenCopy := make([]*api.Node, len(node.Nodes)) @@ -137,10 +164,15 @@ func cleanupNodeTree(node *api.Node) { copy(childrenCopy, node.Nodes) } for i, n := range node.Nodes { - if n.ContentSelectors == nil && len(n.Nodes) == 0 { - childrenCopy = removeNode(childrenCopy, i) + if len(n.Nodes) == 0 { + if n.NodeSelector != nil { + continue + } + if len(n.Source) == 0 && len(n.Nodes) == 0 { + childrenCopy = removeNode(childrenCopy, i) + } + node.Nodes = childrenCopy } - node.Nodes = childrenCopy } } @@ -242,7 +274,6 @@ func (gh *GitHub) URLToGitHubLocator(ctx context.Context, urlString string, reso err error ) // try cache first - //TODO: we probably need lock before getting from the map if ghRL = gh.cache.Get(urlString); ghRL == nil { if ghRL, err = parse(urlString); err != nil { return nil, err @@ -292,7 +323,7 @@ func (gh *GitHub) Accept(uri string) bool { // ResolveNodeSelector recursively adds nodes built from tree entries to node // ResolveNodeSelector implements resourcehandlers/ResourceHandler#ResolveNodeSelector -func (gh *GitHub) ResolveNodeSelector(ctx context.Context, node *api.Node) error { +func (gh *GitHub) ResolveNodeSelector(ctx context.Context, node *api.Node, excludePaths []string, frontMatter map[string]interface{}, excludeFrontMatter map[string]interface{}, depth int32) error { var ( rl *ResourceLocator err error @@ -303,7 +334,9 @@ func (gh *GitHub) ResolveNodeSelector(ctx context.Context, node *api.Node) error if rl != nil { // build node subnodes hierarchy from cache (URLToGitHubLocator populates the cache) childResourceLocators := gh.cache.GetSubset(rl.String()) - buildNodes(node, childResourceLocators, gh.cache) + if err = buildNodes(node, excludePaths, frontMatter, excludeFrontMatter, depth, childResourceLocators, gh.cache, 0); err != nil { + return err + } // finally cleanup folder entries from contentSelectors cleanupNodeTree(node) } @@ -331,11 +364,25 @@ func (gh *GitHub) Read(ctx context.Context, uri string) ([]byte, error) { } case Wiki: { - resp, err := gh.rawusercontentClient.Get(rl.String()) + wikiPage := rl.String() + if !strings.HasSuffix(wikiPage, ".md") { + wikiPage = fmt.Sprintf("%s.%s", wikiPage, "md") + } + resp, err := gh.rawusercontentClient.Get(wikiPage) if err != nil { return nil, err } defer resp.Body.Close() + var hasContentTypeRaw bool + for _, ct := range resp.Header["Content-Type"] { + if strings.Contains(ct, "text/plain") { + hasContentTypeRaw = true + break + } + } + if !hasContentTypeRaw { + return nil, fmt.Errorf("Request for resource content to %s returned unexpected content type for wiki raw content: %s", rl.String(), resp.Header["Content-Type"]) + } return ioutil.ReadAll(resp.Body) } case Tree: @@ -347,19 +394,21 @@ func (gh *GitHub) Read(ctx context.Context, uri string) ([]byte, error) { return blob, err } -// Name implements resourcehandlers/ResourceHandler#Name -func (gh *GitHub) Name(uri string) string { +// ResourceName implements resourcehandlers/ResourceHandler#ResourceName +func (gh *GitHub) ResourceName(uri string) (string, string) { var ( rl *ResourceLocator err error ) - if rl, err = gh.URLToGitHubLocator(nil, uri, true); err != nil { + if rl, err = gh.URLToGitHubLocator(nil, uri, false); err != nil { panic(err) } if gh != nil { - return rl.GetName() + if u, err := urls.Parse(rl.String()); err == nil { + return u.ResourceName, u.Extension + } } - return "" + return "", "" } // BuildAbsLink builds the abs link from the source and the relative path @@ -384,23 +433,6 @@ func (gh *GitHub) BuildAbsLink(source, relPath string) (string, error) { return u.String(), err } -// GetLocalityDomainCandidate returns the provided source as locality domain candidate -// parameters suitable for quering reactor/LocalityDomain#PathInLocality -// Implements resourcehandlers/ResourceHandler#GetLocalityDomainCandidate -func (gh *GitHub) GetLocalityDomainCandidate(source string) (key, path, version string, err error) { - var rl *ResourceLocator - if rl, err = parse(source); rl != nil { - version = rl.SHAAlias - if len(rl.Host) > 0 && len(rl.Owner) > 0 && len(rl.Repo) > 0 { - key = fmt.Sprintf("%s/%s/%s", rl.Host, rl.Owner, rl.Repo) - } - if len(rl.Owner) > 0 && len(rl.Repo) > 0 && len(rl.SHAAlias) > 0 { - path = fmt.Sprintf("%s/%s/%s", rl.Owner, rl.Repo, rl.Path) - } - } - return -} - // SetVersion replaces the version segment in the path of GitHub URLs if // applicable or returns the original URL unchanged if not. // Implements resourcehandlers/ResourceHandler#SetVersion diff --git a/pkg/resourcehandlers/github/github_resource_handler_test.go b/pkg/resourcehandlers/github/github_resource_handler_test.go index 105323b3..8070c1a7 100644 --- a/pkg/resourcehandlers/github/github_resource_handler_test.go +++ b/pkg/resourcehandlers/github/github_resource_handler_test.go @@ -165,15 +165,23 @@ func TestResolveNodeSelector(t *testing.T) { }, } cases := []struct { - description string - inNode *api.Node - mux func(mux *http.ServeMux) - want *api.Node - wantError error + description string + inNode *api.Node + excludePaths []string + frontMatter map[string]interface{} + excludeFrontMatter map[string]interface{} + depth int32 + mux func(mux *http.ServeMux) + want *api.Node + wantError error }{ { "resolve node selector", n1, + nil, + nil, + nil, + 0, func(mux *http.ServeMux) { mux.HandleFunc("/repos/gardener/gardener/git/trees/master", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(fmt.Sprintf(` @@ -221,15 +229,15 @@ func TestResolveNodeSelector(t *testing.T) { }, Nodes: []*api.Node{ { - Name: "README", - ContentSelectors: []api.ContentSelector{{Source: "https://github.com/gardener/gardener/blob/master/docs/README.md"}}, + Name: "README.md", + Source: "https://github.com/gardener/gardener/blob/master/docs/README.md", }, { Name: "concepts", Nodes: []*api.Node{ { - Name: "apiserver", - ContentSelectors: []api.ContentSelector{{Source: "https://github.com/gardener/gardener/blob/master/docs/concepts/apiserver.md"}}, + Name: "apiserver.md", + Source: "https://github.com/gardener/gardener/blob/master/docs/concepts/apiserver.md", }, }, }, @@ -239,7 +247,6 @@ func TestResolveNodeSelector(t *testing.T) { }, } for _, c := range cases { - fmt.Println(c.description) ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) defer cancel() gh := &GitHub{ @@ -253,25 +260,14 @@ func TestResolveNodeSelector(t *testing.T) { c.mux(mux) } gh.Client = client - gotError := gh.ResolveNodeSelector(ctx, c.inNode) + gotError := gh.ResolveNodeSelector(ctx, c.inNode, c.excludePaths, c.frontMatter, c.excludeFrontMatter, c.depth) if gotError != nil { t.Errorf("error == %q, want %q", gotError, c.wantError) } c.want.SetParentsDownwards() api.SortNodesByName(c.inNode) api.SortNodesByName(c.want) - if !reflect.DeepEqual(c.inNode, c.want) { - s, _ := api.Serialize(&api.Documentation{ - Root: c.inNode, - }) - fmt.Printf(s) - fmt.Printf("\n\n") - s, _ = api.Serialize(&api.Documentation{ - Root: c.want, - }) - fmt.Printf(s) - t.Errorf("ResolveNodeSelector == %++v, want %++v", c.inNode, c.want) - } + assert.Equal(t, c.want, c.inNode) } } @@ -296,11 +292,12 @@ func TestName(t *testing.T) { "docs", "", } - cases := []struct { + testCases := []struct { description string inURL string cache *Cache - want string + wantName string + wantExt string }{ { "return file name for url", @@ -310,7 +307,8 @@ func TestName(t *testing.T) { "https://github.com/gardener/gardener/blob/master/docs/README.md": ghrl1, }, }, - "README.md", + "README", + "md", }, { "return folder name for url", @@ -321,17 +319,16 @@ func TestName(t *testing.T) { }, }, "docs", + "", }, } - for _, c := range cases { - fmt.Println(c.description) + for _, tc := range testCases { gh := &GitHub{ - cache: c.cache, - } - got := gh.Name(c.inURL) - if !reflect.DeepEqual(got, c.want) { - t.Errorf("Name(%q) == %q, want %q", c.inURL, got, c.want) + cache: tc.cache, } + gotName, gotExt := gh.ResourceName(tc.inURL) + assert.Equal(t, tc.wantName, gotName) + assert.Equal(t, tc.wantExt, gotExt) } } @@ -437,44 +434,6 @@ func TestGitHub_ResolveRelLink(t *testing.T) { } } -func TestGetLocalityDomainCandidate(t *testing.T) { - tests := []struct { - name string - link string - wantKey string - wantPath string - wantVersion string - wantErr error - }{ - { - name: "", - link: "https://github.com/gardener/gardener/tree/master/readme.md", - wantKey: "github.com/gardener/gardener", - wantPath: "gardener/gardener/readme.md", - wantVersion: "master", - wantErr: nil, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gh := &GitHub{} - gotKey, gotPath, gotVersion, gotErr := gh.GetLocalityDomainCandidate(tt.link) - if gotErr != tt.wantErr { - t.Errorf("err %v!=%v", gotErr, tt.wantErr) - } - if gotKey != tt.wantKey { - t.Errorf("key %v!=%v", gotKey, tt.wantKey) - } - if gotVersion != tt.wantVersion { - t.Errorf("version %v!=%v", gotVersion, tt.wantVersion) - } - if gotPath != tt.wantPath { - t.Errorf("path %v!=%v", gotPath, tt.wantPath) - } - }) - } -} - func TestCleanupNodeTree(t *testing.T) { tests := []struct { name string @@ -484,47 +443,27 @@ func TestCleanupNodeTree(t *testing.T) { { name: "", node: &api.Node{ - Name: "00", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/tree/master/docs/00", - }, - }, + Name: "00", + Source: "https://github.com/gardener/gardener/tree/master/docs/00", Nodes: []*api.Node{ &api.Node{ - Name: "01", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/master/docs/01.md", - }, - }, + Name: "01.md", + Source: "https://github.com/gardener/gardener/blob/master/docs/01.md", }, &api.Node{ - Name: "02", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/tree/master/docs/02", - }, - }, + Name: "02", + Source: "https://github.com/gardener/gardener/tree/master/docs/02", Nodes: []*api.Node{ &api.Node{ - Name: "021", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/master/docs/021.md", - }, - }, + Name: "021.md", + Source: "https://github.com/gardener/gardener/blob/master/docs/021.md", }, }, }, &api.Node{ - Name: "03", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/tree/master/docs/03", - }, - }, - Nodes: []*api.Node{}, + Name: "03", + Source: "https://github.com/gardener/gardener/tree/master/docs/03", + Nodes: []*api.Node{}, }, }, }, @@ -532,23 +471,15 @@ func TestCleanupNodeTree(t *testing.T) { Name: "00", Nodes: []*api.Node{ &api.Node{ - Name: "01", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/master/docs/01.md", - }, - }, + Name: "01.md", + Source: "https://github.com/gardener/gardener/blob/master/docs/01.md", }, &api.Node{ Name: "02", Nodes: []*api.Node{ &api.Node{ - Name: "021", - ContentSelectors: []api.ContentSelector{ - api.ContentSelector{ - Source: "https://github.com/gardener/gardener/blob/master/docs/021.md", - }, - }, + Name: "021.md", + Source: "https://github.com/gardener/gardener/blob/master/docs/021.md", }, }, }, diff --git a/pkg/resourcehandlers/github/integration_test.go b/pkg/resourcehandlers/github/integration_test.go index cca197a3..4f41fee8 100644 --- a/pkg/resourcehandlers/github/integration_test.go +++ b/pkg/resourcehandlers/github/integration_test.go @@ -41,7 +41,7 @@ func TestResolveNodeSelectorLive(t *testing.T) { Path: "https://github.com/gardener/gardener/tree/master/docs", }, } - if err := gh.ResolveNodeSelector(ctx, node); err != nil { + if err := gh.ResolveNodeSelector(ctx, node, nil, nil, nil, 0); err != nil { fmt.Printf("%v", err) } b, _ := yaml.Marshal(node) diff --git a/pkg/resourcehandlers/resource_handlers.go b/pkg/resourcehandlers/resource_handlers.go index 705227da..1230df6a 100644 --- a/pkg/resourcehandlers/resource_handlers.go +++ b/pkg/resourcehandlers/resource_handlers.go @@ -15,12 +15,12 @@ type ResourceHandler interface { Accept(uri string) bool // ResolveNodeSelector resolves the NodeSelector rules of a Node into subnodes // hierarchy (Node.Nodes) - ResolveNodeSelector(ctx context.Context, node *api.Node) error + ResolveNodeSelector(ctx context.Context, node *api.Node, excludePaths []string, frontMatter map[string]interface{}, excludeFrontMatter map[string]interface{}, depth int32) error // Read a resource content at uri into a byte array Read(ctx context.Context, uri string) ([]byte, error) - // Name resolves the name of the resource from a URI - // Example: https://github.com/owner/repo/tree/master/a/b/c.md -> c.md - Name(uri string) string + // ResourceName returns a breakdown of a resource name in the link, consisting + // of name and potentially and extention without the dot. + ResourceName(link string) (string, string) // BuildAbsLink should return an absolute path of a relative link in regards of the provided // source BuildAbsLink(source, link string) (string, error) @@ -28,8 +28,6 @@ type ResourceHandler interface { // If the provided link is not referencing an embedable object, the function // returns absLink without changes. GetRawFormatLink(absLink string) (string, error) - // GetLocalityDomainCandidate ... - GetLocalityDomainCandidate(source string) (key, path, version string, err error) // SetVersion sets version to absLink according to the API scheme. For GitHub // for example this would replace e.g. the 'master' segment in the path with version SetVersion(absLink, version string) (string, error) diff --git a/pkg/resourcehandlers/resource_handlers_test.go b/pkg/resourcehandlers/resource_handlers_test.go index bf5bb4fa..2ca5a505 100644 --- a/pkg/resourcehandlers/resource_handlers_test.go +++ b/pkg/resourcehandlers/resource_handlers_test.go @@ -20,16 +20,18 @@ type TestResourceHandler struct { func (rh *TestResourceHandler) Accept(uri string) bool { return rh.accept } -func (rh *TestResourceHandler) ResolveNodeSelector(ctx context.Context, node *api.Node) error { +func (rh *TestResourceHandler) ResolveNodeSelector(ctx context.Context, node *api.Node, excludePaths []string, frontMatter map[string]interface{}, excludeFrontMatter map[string]interface{}, depth int32) error { return nil } func (rh *TestResourceHandler) Read(ctx context.Context, uri string) ([]byte, error) { return nil, nil } func (rh *TestResourceHandler) Name(uri string) string { - return string("") + return "" +} +func (rh *TestResourceHandler) ResourceName(uri string) (string, string) { + return "", "" } - func (rh *TestResourceHandler) BuildAbsLink(source, relLink string) (string, error) { return relLink, nil } diff --git a/pkg/util/tests/tests.go b/pkg/util/tests/tests.go index 22ed5428..9a1ffaa4 100644 --- a/pkg/util/tests/tests.go +++ b/pkg/util/tests/tests.go @@ -30,6 +30,10 @@ func SetKlogV(level int) { } } +// StrPtr is a convenience one-liner for producing pointers +// to string values +func StrPtr(s string) *string { return &s } + // ReadBodyAndClose properly handles the reading of body and closing the reader func ReadBodyAndClose(bodyReader io.ReadCloser) ([]byte, error) { defer bodyReader.Close() diff --git a/pkg/writers/dryRunWriter.go b/pkg/writers/dryRunWriter.go index 265bc58b..238352f0 100644 --- a/pkg/writers/dryRunWriter.go +++ b/pkg/writers/dryRunWriter.go @@ -131,6 +131,7 @@ func format(files []*file, b *bytes.Buffer) { b.WriteString(fmt.Sprintf("%s\n", s)) if i < len(dd)-1 { b.Write(bytes.Repeat([]byte(" "), i)) + continue } for _, st := range f.stats { b.Write([]byte(" ")) diff --git a/pkg/writers/dryRunWriter_test.go b/pkg/writers/dryRunWriter_test.go index 52fa6399..e2e86836 100644 --- a/pkg/writers/dryRunWriter_test.go +++ b/pkg/writers/dryRunWriter_test.go @@ -17,6 +17,7 @@ func TestFormat(t *testing.T) { in := []string{ "dev/__resources/015ec383-3c1b-487b-acff-4d7f4f8a1b14.png", "dev/__resources/173a7246-e1d5-40d5-b981-8cff293e177a.png", + "dev/doc/README.md", "dev/doc/aws_provider.md", "dev/doc/gardener", "dev/doc/gardener/_index.md", @@ -41,6 +42,7 @@ func TestFormat(t *testing.T) { 015ec383-3c1b-487b-acff-4d7f4f8a1b14.png 173a7246-e1d5-40d5-b981-8cff293e177a.png doc + README.md aws_provider.md gardener _index.md