From 1fd0629adb63bb7b3681f66d8dbd53d744937ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20D=C3=A4hne?= Date: Mon, 2 Oct 2023 15:08:50 +0200 Subject: [PATCH] FEATURE: code analysis supports multiple classes per source file --- analysis/codeAnalyzer.go | 63 +++++++++++++++++--------------- analysis/codeAnalyzer_test.go | 2 +- dataStructures/directedGraph.go | 14 +++++-- dataStructures/stringSet.go | 9 ++++- parsing/codeParser.go | 4 +- parsing/goParser.go | 6 +-- parsing/goParser_test.go | 2 +- parsing/groovyParser.go | 8 ++-- parsing/groovyParser_test.go | 2 +- parsing/javaParser.go | 8 ++-- parsing/javaParser_test.go | 3 +- parsing/javaScriptParser.go | 8 ++-- parsing/javaScriptParser_test.go | 10 ++--- parsing/phpParser.go | 8 ++-- parsing/phpParser_test.go | 2 +- 15 files changed, 84 insertions(+), 65 deletions(-) diff --git a/analysis/codeAnalyzer.go b/analysis/codeAnalyzer.go index 97338e0..2c8431a 100644 --- a/analysis/codeAnalyzer.go +++ b/analysis/codeAnalyzer.go @@ -1,38 +1,41 @@ package analysis import ( - "github.com/sandstorm/dependency-analysis/dataStructures" - "github.com/sandstorm/dependency-analysis/parsing" "os" "path/filepath" "regexp" + + "github.com/sandstorm/dependency-analysis/dataStructures" + "github.com/sandstorm/dependency-analysis/parsing" ) // mapping from file path to source-unit -type sourceUnitByFile = map[string][]string +type sourceUnitByFileType = map[string][][]string // mapping from source-unit to all its imports type dependenciesBySourceUnit = map[string]*dataStructures.StringSet func BuildDependencyGraph(settings *AnalyzerSettings) (*dataStructures.DirectedStringGraph, error) { - sourceUnits := make(sourceUnitByFile) + sourceUnitsByFile := make(sourceUnitByFileType) if err := filepath.Walk(settings.SourcePath, initializeParsers(settings.IncludePattern)); err != nil { return nil, err } - if err := filepath.Walk(settings.SourcePath, findSourceUnits(settings.IncludePattern, sourceUnits)); err != nil { + if err := filepath.Walk(settings.SourcePath, findSourceUnits(settings.IncludePattern, sourceUnitsByFile)); err != nil { return nil, err } var rootPackage []string = nil - for _, sourceUnit := range sourceUnits { - if rootPackage == nil { - rootPackage = sourceUnit - } else { - commonPrefixLength := getCommonPrefixLength(rootPackage, sourceUnit) - rootPackage = rootPackage[:commonPrefixLength] + for _, sourceUnits := range sourceUnitsByFile { + for _, sourceUnit := range sourceUnits { + if rootPackage == nil { + rootPackage = sourceUnit + } else { + commonPrefixLength := getCommonPrefixLength(rootPackage, sourceUnit) + rootPackage = rootPackage[:commonPrefixLength] + } } } - return findDependencies(rootPackage, sourceUnits, settings.Depth) + return findDependencies(rootPackage, sourceUnitsByFile, settings.Depth) } func initializeParsers(includePattern *regexp.Regexp) filepath.WalkFunc { @@ -47,7 +50,7 @@ func initializeParsers(includePattern *regexp.Regexp) filepath.WalkFunc { } } -func findSourceUnits(includePattern *regexp.Regexp, result sourceUnitByFile) filepath.WalkFunc { +func findSourceUnits(includePattern *regexp.Regexp, result sourceUnitByFileType) filepath.WalkFunc { return func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -58,9 +61,9 @@ func findSourceUnits(includePattern *regexp.Regexp, result sourceUnitByFile) fil return err } defer fileReader.Close() - sourceUnit := parsing.ParseSourceUnit(path, fileReader) - if len(sourceUnit) > 0 { - result[path] = sourceUnit + sourceUnits := parsing.ParseSourceUnit(path, fileReader) + if len(sourceUnits) > 0 { + result[path] = sourceUnits } } return nil @@ -77,11 +80,11 @@ func getCommonPrefixLength(left []string, right []string) int { return limit } -func findDependencies(rootPackage []string, sourceUnits sourceUnitByFile, depth int) (*dataStructures.DirectedStringGraph, error) { +func findDependencies(rootPackage []string, sourceUnitsByFile sourceUnitByFileType, depth int) (*dataStructures.DirectedStringGraph, error) { dependencyGraph := dataStructures.NewDirectedStringGraph() prefixLength := len(rootPackage) segmentLimit := len(rootPackage) + depth - for path, sourceUnit := range sourceUnits { + for path, sourceUnits := range sourceUnitsByFile { fileReader, err := os.Open(path) if err != nil { return nil, err @@ -91,17 +94,19 @@ func findDependencies(rootPackage []string, sourceUnits sourceUnitByFile, depth if err != nil { return nil, err } - sourceUnitString := parsing.JoinPathSegments( - path, - sourceUnit[prefixLength:min(segmentLimit, len(sourceUnit))]) - dependencyGraph.AddNode(sourceUnitString) - for _, dependency := range allDependencies { - if arrayStartsWith(dependency, rootPackage) { - dependencyString := parsing.JoinPathSegments( - path, - dependency[prefixLength:min(segmentLimit, len(dependency))]) - if sourceUnitString != dependencyString { - dependencyGraph.AddEdge(sourceUnitString, dependencyString) + for _, sourceUnit := range sourceUnits { + sourceUnitString := parsing.JoinPathSegments( + path, + sourceUnit[prefixLength:min(segmentLimit, len(sourceUnit))]) + dependencyGraph.AddNode(sourceUnitString) + for _, dependency := range allDependencies { + if arrayStartsWith(dependency, rootPackage) { + dependencyString := parsing.JoinPathSegments( + path, + dependency[prefixLength:min(segmentLimit, len(dependency))]) + if sourceUnitString != dependencyString { + dependencyGraph.AddEdge(sourceUnitString, dependencyString) + } } } } diff --git a/analysis/codeAnalyzer_test.go b/analysis/codeAnalyzer_test.go index 7e9800b..e8d150b 100644 --- a/analysis/codeAnalyzer_test.go +++ b/analysis/codeAnalyzer_test.go @@ -26,7 +26,7 @@ func TestBuildDependencyGraph(t *testing.T) { AddEdge("analysis", "parsing"). AddEdge("analysis", "dataStructures"). AddEdge("parsing", "dataStructures"). - AddEdge("rendering", "dataStructuress") + AddEdge("rendering", "dataStructures") AssertEquals(t, "incorrect graph", expected, actual) }) } diff --git a/dataStructures/directedGraph.go b/dataStructures/directedGraph.go index 419f34d..9b7c117 100644 --- a/dataStructures/directedGraph.go +++ b/dataStructures/directedGraph.go @@ -1,6 +1,9 @@ package dataStructures -import "fmt" +import ( + "fmt" + "sort" +) // directed graph with nodes of type string type DirectedStringGraph struct { @@ -55,8 +58,13 @@ func (this *DirectedStringGraph) GetEdges() map[string][]string { func (this *DirectedStringGraph) String() string { result := "{ " - for source, targets := range this.Edges { - result += fmt.Sprintf("%v -> %v ", source, targets) + sources := make([]string, 0, len(this.Edges)) + for source := range this.Edges { + sources = append(sources, source) + } + sort.Strings(sources) + for _, source := range sources { + result += fmt.Sprintf("%v -> %v ", source, this.Edges[source]) } return result + "}" } diff --git a/dataStructures/stringSet.go b/dataStructures/stringSet.go index b86b23c..799978f 100644 --- a/dataStructures/stringSet.go +++ b/dataStructures/stringSet.go @@ -1,7 +1,7 @@ package dataStructures import ( - "fmt" + "sort" ) type StringSet struct { @@ -44,8 +44,13 @@ func (this *StringSet) ToArray() []string { func (this *StringSet) String() string { result := "[ " + keys := make([]string, 0, len(this.content)) for key := range this.content { - result += fmt.Sprintf("%v ", key) + keys = append(keys, key) + } + sort.Strings(keys) + for _, key := range keys { + result += key + " " } return result + "]" } diff --git a/parsing/codeParser.go b/parsing/codeParser.go index 19a5fb6..bd71e6e 100644 --- a/parsing/codeParser.go +++ b/parsing/codeParser.go @@ -24,7 +24,7 @@ func InitializeParsers(filePath string) error { // The package path is already split by the language's delimiter, // e.g. in Java de.sandstorm.test.helpers.ListHelpers results in // [de sandstorm test helpers ListHelpers] -func ParseSourceUnit(sourcePath string, fileReader io.Reader) []string { +func ParseSourceUnit(sourcePath string, fileReader io.Reader) [][]string { switch { case strings.HasSuffix(sourcePath, ".go"): filePathSplit := strings.Split(sourcePath, "/") @@ -47,7 +47,7 @@ func ParseSourceUnit(sourcePath string, fileReader io.Reader) []string { case strings.HasSuffix(sourcePath, ".jsx"): return ParseJavaScriptSourceUnit(sourcePath) } - return []string{} + return [][]string{} } // 3rd step during code analysis, called for each source unit (see 2nc step) diff --git a/parsing/goParser.go b/parsing/goParser.go index 30d66b3..bc2f406 100644 --- a/parsing/goParser.go +++ b/parsing/goParser.go @@ -31,13 +31,13 @@ func ParseGoMod(filePath string) error { return nil } -func ParseGoSourceUnit(fileName string, fileReader io.Reader) []string { +func ParseGoSourceUnit(fileName string, fileReader io.Reader) [][]string { packageString := getFirstLineMatchInReader(fileReader, golangParser.packageRegex) if packageString != "" { packagePath := strings.Split(packageString, "/") - return append(golangParser.modulePath, append(packagePath, fileName)...) + return [][]string{append(golangParser.modulePath, append(packagePath, fileName)...)} } else { - return []string{} + return [][]string{} } } diff --git a/parsing/goParser_test.go b/parsing/goParser_test.go index 0f9cac2..171a6d4 100644 --- a/parsing/goParser_test.go +++ b/parsing/goParser_test.go @@ -55,7 +55,7 @@ func TestParseGoSourceUnit(t *testing.T) { t.Run(testCase.name, func(t *testing.T) { file := bytes.NewBufferString(testCase.fileContent) AssertEquals(t, - testCase.expected, + [][]string{testCase.expected}, ParseGoSourceUnit(testCase.fileName, file), ) }) diff --git a/parsing/groovyParser.go b/parsing/groovyParser.go index 3a99eac..1a1a0c1 100644 --- a/parsing/groovyParser.go +++ b/parsing/groovyParser.go @@ -17,18 +17,18 @@ var groovyParser = struct { importRegex: regexp.MustCompile(`import\s+(?:static\s+)?([^; \n]+)\s*;?`), } -func ParseGroovySourceUnit(fileReader io.Reader) []string { +func ParseGroovySourceUnit(fileReader io.Reader) [][]string { scanner := bufio.NewScanner(fileReader) scanner.Split(bufio.ScanLines) packageString := getFirstLineMatchInScanner(scanner, groovyParser.packageRegex) className := getFirstLineMatchInScanner(scanner, groovyParser.classRegex) if packageString != "" && className != "" { - return append(strings.Split(packageString, "."), className) + return [][]string{append(strings.Split(packageString, "."), className)} } if className != "" { - return []string{className} + return [][]string{[]string{className}} } - return []string{} + return [][]string{} } func ParseGroovyImports(fileReader io.Reader) ([][]string, error) { diff --git a/parsing/groovyParser_test.go b/parsing/groovyParser_test.go index c1051fe..4454687 100644 --- a/parsing/groovyParser_test.go +++ b/parsing/groovyParser_test.go @@ -55,7 +55,7 @@ func TestParseGroovySourceUnit(t *testing.T) { t.Run(testCase.name, func(t *testing.T) { file := bytes.NewBufferString(testCase.fileContent) AssertEquals(t, - testCase.expected, + [][]string{testCase.expected}, ParseGroovySourceUnit(file), ) }) diff --git a/parsing/javaParser.go b/parsing/javaParser.go index 3f7ef9d..c06b7e5 100644 --- a/parsing/javaParser.go +++ b/parsing/javaParser.go @@ -17,18 +17,18 @@ var javaParser = struct { importRegex: regexp.MustCompile(`import\s+(?:static\s+)?([^; ]+)\s*;`), } -func ParseJavaSourceUnit(fileReader io.Reader) []string { +func ParseJavaSourceUnit(fileReader io.Reader) [][]string { scanner := bufio.NewScanner(fileReader) scanner.Split(bufio.ScanLines) packageString := getFirstLineMatchInScanner(scanner, javaParser.packageRegex) className := getFirstLineMatchInScanner(scanner, javaParser.classRegex) if packageString != "" && className != "" { - return append(strings.Split(packageString, "."), className) + return [][]string{append(strings.Split(packageString, "."), className)} } if className != "" { - return []string{className} + return [][]string{[]string{className}} } - return []string{} + return [][]string{} } func ParseJavaImports(fileReader io.Reader) ([][]string, error) { diff --git a/parsing/javaParser_test.go b/parsing/javaParser_test.go index e9a063e..b0a0755 100644 --- a/parsing/javaParser_test.go +++ b/parsing/javaParser_test.go @@ -41,12 +41,13 @@ func TestParseJavaSourceUnit(t *testing.T) { expected: []string{"de", "sandstorm", "test", "Main"}, }, // TODO: test private static final class + // TODO: interfaces and enums (also other languages) } for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { file := bytes.NewBufferString(testCase.fileContent) AssertEquals(t, - testCase.expected, + [][]string{testCase.expected}, ParseJavaSourceUnit(file), ) }) diff --git a/parsing/javaScriptParser.go b/parsing/javaScriptParser.go index 6475f07..783b677 100644 --- a/parsing/javaScriptParser.go +++ b/parsing/javaScriptParser.go @@ -13,18 +13,18 @@ var javaScriptParser = struct { importRegex: regexp.MustCompile(`(?:import\s+.*)?from\s+["']([^'"]+)["'];?`), } -func ParseJavaScriptSourceUnit(sourcePath string) []string { +func ParseJavaScriptSourceUnit(sourcePath string) [][]string { if strings.Contains(sourcePath, "node_modules") { - return []string{} + return [][]string{} } else { parent := filepath.Dir(sourcePath) parentSegments := strings.Split(parent, "/") fileName := filepath.Base(sourcePath) fileBasename := strings.TrimSuffix(fileName, filepath.Ext(fileName)) if fileBasename == "index" { - return parentSegments + return [][]string{parentSegments} } else { - return append(parentSegments, fileBasename) + return [][]string{append(parentSegments, fileBasename)} } } } diff --git a/parsing/javaScriptParser_test.go b/parsing/javaScriptParser_test.go index dded561..e0acedc 100644 --- a/parsing/javaScriptParser_test.go +++ b/parsing/javaScriptParser_test.go @@ -9,27 +9,27 @@ func TestParseJavaScriptSourceUnit(t *testing.T) { testCases := []struct { name string sourcePath string - expected []string + expected [][]string }{ { name: "file path without dots", sourcePath: "src/Components/Button/button.js", - expected: []string{"src", "Components", "Button", "button"}, + expected: [][]string{[]string{"src", "Components", "Button", "button"}}, }, { name: "file path with dots", sourcePath: "a/.././src/a/../b/c/../.././Components/Button/button.js", - expected: []string{"src", "Components", "Button", "button"}, + expected: [][]string{[]string{"src", "Components", "Button", "button"}}, }, { name: "file path with index.js", sourcePath: "a/.././src/a/../b/c/../.././Components/Button/index.js", - expected: []string{"src", "Components", "Button"}, + expected: [][]string{[]string{"src", "Components", "Button"}}, }, { name: "ignore node_modules", sourcePath: "node_modules/some/lib.js", - expected: []string{}, + expected: [][]string{}, }, } for _, testCase := range testCases { diff --git a/parsing/phpParser.go b/parsing/phpParser.go index d9f9d29..33475d1 100644 --- a/parsing/phpParser.go +++ b/parsing/phpParser.go @@ -17,18 +17,18 @@ var phpParser = struct { useRegex: regexp.MustCompile(`use\s+([^; ]+)\s*;`), } -func ParsePhpSourceUnit(fileReader io.Reader) []string { +func ParsePhpSourceUnit(fileReader io.Reader) [][]string { scanner := bufio.NewScanner(fileReader) scanner.Split(bufio.ScanLines) namespace := getFirstLineMatchInScanner(scanner, phpParser.namespaceRegex) className := getFirstLineMatchInScanner(scanner, phpParser.classRegex) if namespace != "" && className != "" { - return append(strings.Split(namespace, "\\"), className) + return [][]string{append(strings.Split(namespace, "\\"), className)} } if className != "" { - return []string{className} + return [][]string{[]string{className}} } - return []string{} + return [][]string{} } func ParsePhpImports(fileReader io.Reader) ([][]string, error) { diff --git a/parsing/phpParser_test.go b/parsing/phpParser_test.go index e88b25b..8659b3c 100644 --- a/parsing/phpParser_test.go +++ b/parsing/phpParser_test.go @@ -47,7 +47,7 @@ func TestParsePhpSourceUnit(t *testing.T) { t.Run(testCase.name, func(t *testing.T) { file := bytes.NewBufferString(testCase.fileContent) AssertEquals(t, - testCase.expected, + [][]string{testCase.expected}, ParsePhpSourceUnit(file), ) })