From e2e11a31a10ba0c2d2f105760b0a2e3580414038 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Wed, 28 May 2025 12:28:33 +0530 Subject: [PATCH 01/12] feat: reduce dependency of yaml-runtime issue interface on the pkg/analysis module --- pkg/analysis/analyze.go | 137 +++++++++++++++++------------------ pkg/analysis/analyze_test.go | 3 +- pkg/analysis/pattern_rule.go | 14 ++-- pkg/cli/cli.go | 20 ++--- pkg/cli/test_runner.go | 2 +- 5 files changed, 88 insertions(+), 88 deletions(-) diff --git a/pkg/analysis/analyze.go b/pkg/analysis/analyze.go index 53891bcf..d08924ca 100644 --- a/pkg/analysis/analyze.go +++ b/pkg/analysis/analyze.go @@ -1,77 +1,76 @@ package analysis import ( - "encoding/json" "fmt" "path/filepath" "regexp" "strings" sitter "github.com/smacker/go-tree-sitter" - "globstar.dev/pkg/config" + ana "globstar.dev/analysis" ) -type Issue struct { - // The category of the issue - Category config.Category - // The severity of the issue - Severity config.Severity - // The message to display to the user - Message string - // The file path of the file that the issue was found in - Filepath string - // The range of the issue in the source code - Range sitter.Range - // (optional) The AST node that caused the issue - Node *sitter.Node - // Id is a unique ID for the issue. - // Issue that have 'Id's can be explained using the `globstar desc` command. - Id *string -} - -func (i *Issue) AsJson() ([]byte, error) { - type location struct { - Row int `json:"row"` - Column int `json:"column"` - } - - type position struct { - Filename string `json:"filename"` - Start location `json:"start"` - End location `json:"end"` - } - - type issueJson struct { - Category config.Category `json:"category"` - Severity config.Severity `json:"severity"` - Message string `json:"message"` - Range position `json:"range"` - Id string `json:"id"` - } - issue := issueJson{ - Category: i.Category, - Severity: i.Severity, - Message: i.Message, - Range: position{ - Filename: i.Filepath, - Start: location{ - Row: int(i.Range.StartPoint.Row), - Column: int(i.Range.StartPoint.Column), - }, - End: location{ - Row: int(i.Range.EndPoint.Row), - Column: int(i.Range.EndPoint.Column), - }, - }, - Id: *i.Id, - } - - return json.Marshal(issue) -} - -func (i *Issue) AsText() ([]byte, error) { - return []byte(fmt.Sprintf("%s:%d:%d:%s", i.Filepath, i.Range.StartPoint.Row, i.Range.StartPoint.Column, i.Message)), nil -} +// type Issue struct { +// // The category of the issue +// Category config.Category +// // The severity of the issue +// Severity config.Severity +// // The message to display to the user +// Message string +// // The file path of the file that the issue was found in +// Filepath string +// // The range of the issue in the source code +// Range sitter.Range +// // (optional) The AST node that caused the issue +// Node *sitter.Node +// // Id is a unique ID for the issue. +// // Issue that have 'Id's can be explained using the `globstar desc` command. +// Id *string +// } + +// func (i *Issue) AsJson() ([]byte, error) { +// type location struct { +// Row int `json:"row"` +// Column int `json:"column"` +// } + +// type position struct { +// Filename string `json:"filename"` +// Start location `json:"start"` +// End location `json:"end"` +// } + +// type issueJson struct { +// Category config.Category `json:"category"` +// Severity config.Severity `json:"severity"` +// Message string `json:"message"` +// Range position `json:"range"` +// Id string `json:"id"` +// } +// issue := issueJson{ +// Category: i.Category, +// Severity: i.Severity, +// Message: i.Message, +// Range: position{ +// Filename: i.Filepath, +// Start: location{ +// Row: int(i.Range.StartPoint.Row), +// Column: int(i.Range.StartPoint.Column), +// }, +// End: location{ +// Row: int(i.Range.EndPoint.Row), +// Column: int(i.Range.EndPoint.Column), +// }, +// }, +// Id: *i.Id, +// } + +// return json.Marshal(issue) +// } + +// func (i *Issue) AsText() ([]byte, error) { +// return []byte(fmt.Sprintf("%s:%d:%d:%s", i.Filepath, i.Range.StartPoint.Row, i.Range.StartPoint.Column, i.Message)), nil +// } type Analyzer struct { Language Language @@ -92,7 +91,7 @@ type Analyzer struct { // exitCheckers maps node types to the checkers that should be applied // when leaving that node. exitCheckersForNode map[string][]Checker - issuesRaised []*Issue + issuesRaised []*ana.Issue } type SkipComment struct { @@ -147,7 +146,7 @@ func NewAnalyzer(file *ParseResult, checkers []Checker) *Analyzer { return ana } -func (ana *Analyzer) Analyze() []*Issue { +func (ana *Analyzer) Analyze() []*ana.Issue { WalkTree(ana.ParseResult.Ast, ana) ana.runPatternCheckers() return ana.issuesRaised @@ -329,14 +328,14 @@ func (ana *Analyzer) runPatternCheckers() { } } -func (ana *Analyzer) Report(issue *Issue) { +func (ana *Analyzer) Report(issue *ana.Issue) { ana.issuesRaised = append(ana.issuesRaised, issue) } -func RunYamlCheckers(path string, analyzers []*Analyzer) ([]*Issue, error) { +func RunYamlCheckers(path string, analyzers []*Analyzer) ([]*ana.Issue, error) { InitializeSkipComments(analyzers) - issues := []*Issue{} + issues := []*ana.Issue{} for _, analyzer := range analyzers { issues = append(issues, analyzer.Analyze()...) } @@ -405,7 +404,7 @@ func GatherSkipInfo(fileContext *ParseResult) []*SkipComment { return skipLines } -func (ana *Analyzer) ContainsSkipcq(skipLines []*SkipComment, issue *Issue) bool { +func (ana *Analyzer) ContainsSkipcq(skipLines []*SkipComment, issue *ana.Issue) bool { if len(skipLines) == 0 { return false } diff --git a/pkg/analysis/analyze_test.go b/pkg/analysis/analyze_test.go index 696a8fa5..31efdf01 100644 --- a/pkg/analysis/analyze_test.go +++ b/pkg/analysis/analyze_test.go @@ -6,6 +6,7 @@ import ( sitter "github.com/smacker/go-tree-sitter" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "globstar.dev/analysis" ) func parseTestFile(t *testing.T, filename string, source string, language Language) *ParseResult { @@ -150,7 +151,7 @@ func TestSkipCq(t *testing.T) { require.NotNil(t, assertNode, "failed to capture assert node") - issue := &Issue{ + issue := &analysis.Issue{ Filepath: "no-assert.test.py", Node: assertNode, Id: &tt.checkerId, diff --git a/pkg/analysis/pattern_rule.go b/pkg/analysis/pattern_rule.go index 03b19be6..3af08d34 100644 --- a/pkg/analysis/pattern_rule.go +++ b/pkg/analysis/pattern_rule.go @@ -7,6 +7,7 @@ import ( "github.com/gobwas/glob" sitter "github.com/smacker/go-tree-sitter" + analysis "globstar.dev/analysis" "globstar.dev/pkg/config" "gopkg.in/yaml.v3" ) @@ -97,14 +98,13 @@ func (r *patternCheckerImpl) OnMatch( ) } } - raisedIssue := &Issue{ - Range: matchedNode.Range(), - Node: matchedNode, - Message: message, + raisedIssue := &analysis.Issue{ + Message: message, Filepath: ana.ParseResult.FilePath, - Category: r.Category(), - Severity: r.Severity(), - Id: &r.issueId, + Category: analysis.Category(r.Category()), + Severity: analysis.Severity(r.Severity()), + Id: &r.issueId, + Node: matchedNode, } filepath := ana.ParseResult.FilePath diff --git a/pkg/cli/cli.go b/pkg/cli/cli.go index f6183aca..d66f0fd2 100644 --- a/pkg/cli/cli.go +++ b/pkg/cli/cli.go @@ -298,7 +298,7 @@ func (c *Cli) CheckFile( checkersMap map[analysis.Language][]analysis.Checker, patternCheckers map[analysis.Language][]analysis.YamlChecker, path string, -) ([]*analysis.Issue, error) { +) ([]*goAnalysis.Issue, error) { lang := analysis.LanguageFromFilePath(path) checkers := checkersMap[lang] if checkers == nil && patternCheckers == nil { @@ -320,20 +320,20 @@ func (c *Cli) CheckFile( } type checkResult struct { - issues []*analysis.Issue + issues []*goAnalysis.Issue numFilesChecked int } func (lr *checkResult) GetExitStatus(conf *config.Config) int { for _, issue := range lr.issues { for _, failCategory := range conf.FailWhen.CategoryIn { - if issue.Category == failCategory { + if issue.Category == goAnalysis.Category(failCategory) { return conf.FailWhen.ExitCode } } for _, failSeverity := range conf.FailWhen.SeverityIn { - if issue.Severity == failSeverity { + if issue.Severity == goAnalysis.Severity(failSeverity) { return conf.FailWhen.ExitCode } } @@ -494,11 +494,11 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { txt, _ := issue.AsText() log.Error().Msg(string(txt)) - result.issues = append(result.issues, &analysis.Issue{ + result.issues = append(result.issues, &goAnalysis.Issue{ Filepath: issue.Filepath, Message: issue.Message, - Severity: config.Severity(issue.Severity), - Category: config.Category(issue.Category), + Severity: goAnalysis.Severity(issue.Severity), + Category: goAnalysis.Category(issue.Category), Node: issue.Node, Id: issue.Id, }) @@ -516,11 +516,11 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { } for _, issue := range customGoIssues { - result.issues = append(result.issues, &analysis.Issue{ + result.issues = append(result.issues, &goAnalysis.Issue{ Filepath: issue.Filepath, Message: issue.Message, - Severity: config.Severity(issue.Severity), - Category: config.Category(issue.Category), + Severity: goAnalysis.Severity(issue.Severity), + Category: goAnalysis.Category(issue.Category), Node: issue.Node, Id: issue.Id, }) diff --git a/pkg/cli/test_runner.go b/pkg/cli/test_runner.go index e213ad3e..a951daf4 100644 --- a/pkg/cli/test_runner.go +++ b/pkg/cli/test_runner.go @@ -112,7 +112,7 @@ func runTestCases(dir string) (passed bool, err error) { var got []int for _, issue := range issues { - got = append(got, int(issue.Range.StartPoint.Row)+1) // 0-indexed to 1-indexed + got = append(got, int(issue.Node.Range().StartPoint.Row)+1) // 0-indexed to 1-indexed } slices.Sort(got) From 2337a1d1b518be30d0878fd08a047247fc707d09 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Fri, 30 May 2025 01:31:38 +0530 Subject: [PATCH 02/12] feat: port the major parsing logic for yaml analyzers to the updated analysis engine --- analysis/language.go | 61 +++ analysis/testrunner.go | 50 +++ analysis/yaml.go | 290 ++++++++++++ checkers/checker.go | 15 +- cmd/globstar/main.go | 2 +- pkg/analysis/analyze.go | 846 +++++++++++++++++------------------ pkg/analysis/analyze_test.go | 304 ++++++------- pkg/analysis/pattern_rule.go | 717 +++++++++++++++-------------- pkg/analysis/rule.go | 2 +- pkg/cli/cli.go | 76 ++-- pkg/cli/test_runner.go | 356 +++++++-------- 11 files changed, 1567 insertions(+), 1152 deletions(-) create mode 100644 analysis/yaml.go diff --git a/analysis/language.go b/analysis/language.go index 3061d108..621afc0f 100644 --- a/analysis/language.go +++ b/analysis/language.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "strings" sitter "github.com/smacker/go-tree-sitter" @@ -83,6 +84,66 @@ const ( LangSwift ) +func DecodeLanguage(language string) Language { + language = strings.ToLower(language) + switch language { + case "javascript", "js": + return LangJs + case "typescript", "ts": + return LangTs + case "jsx", "tsx": + return LangTsx + case "python", "py": + return LangPy + case "ocaml", "ml": + return LangOCaml + case "docker", "dockerfile": + return LangDockerfile + case "java": + return LangJava + case "kotlin", "kt": + return LangKotlin + case "rust", "rs": + return LangRust + case "ruby", "rb": + return LangRuby + case "lua": + return LangLua + case "yaml", "yml": + return LangYaml + case "sql": + return LangSql + case "css", "css3": + return LangCss + case "markdown", "md": + return LangMarkdown + case "sh", "bash": + return LangBash + case "csharp", "cs": + return LangCsharp + case "elixir", "ex": + return LangElixir + case "elm": + return LangElm + case "go": + return LangGo + case "groovy": + return LangGroovy + case "hcl", "tf": + return LangHcl + case "html": + return LangHtml + case "php": + return LangPhp + case "scala": + return LangScala + case "swift": + return LangSwift + default: + return LangUnknown + } +} + // tsGrammarForLang returns the tree-sitter grammar for the given language. // May return `nil` when `lang` is `LangUnkown`. func (lang Language) Grammar() *sitter.Language { diff --git a/analysis/testrunner.go b/analysis/testrunner.go index e0535ae4..1558468b 100644 --- a/analysis/testrunner.go +++ b/analysis/testrunner.go @@ -3,6 +3,7 @@ package analysis import ( "fmt" "io/fs" + "os" "path/filepath" "regexp" "sort" @@ -144,6 +145,48 @@ func getExpectedIssuesInDir(testDir string, fileFilter func(string) bool) (map[s return expectedIssues, nil } +func discoverYamlAnalyzers(testDir string) ([]*Analyzer, error) { + var yamlAnalyzers []*Analyzer + + err := filepath.Walk(testDir, func(path string, info fs.FileInfo, err error) error { + if err != nil { + return nil + } + + if info.IsDir() { + return nil + } + + fileExt := filepath.Ext(path) + isYamlFile := fileExt == ".yaml" || fileExt == ".yml" + if !isYamlFile { + return nil + } + + // Check if there's a corresponding test file + baseName := strings.TrimSuffix(path, fileExt) + + // Try to read the YAML checker + analyzer, err := ReadFromFile(path) + if err != nil { + // Skip files that aren't valid checkers + return nil + } + + // Check if corresponding test file exists + testFile := baseName + ".test" + GetExtFromLanguage(analyzer.Language) + if _, err := os.Stat(testFile); os.IsNotExist(err) { + // Skip if no test file exists + return nil + } + + yamlAnalyzers = append(yamlAnalyzers, &analyzer) + return nil + }) + + return yamlAnalyzers, err +} + func getExpectedIssuesInFile(file *ParseResult, query *sitter.Query) map[int][]string { commentIdentifier := GetEscapedCommentIdentifierFromPath(file.FilePath) @@ -210,6 +253,13 @@ func RunAnalyzerTests(testDir string, analyzers []*Analyzer) (string, string, bo // if there's a test file in the testDir for which there's no analyzer, // it's most likely a YAML checker test, so skip it + + yamlAnalyzers, err := discoverYamlAnalyzers(testDir) + if err != nil { + return "", "", false, err + } + analyzers = append(analyzers, yamlAnalyzers...) + likelyTestFiles := []string{} for _, analyzer := range analyzers { likelyTestFiles = append(likelyTestFiles, fmt.Sprintf("%s.test%s", analyzer.Name, GetExtFromLanguage(analyzer.Language))) diff --git a/analysis/yaml.go b/analysis/yaml.go new file mode 100644 index 00000000..8e4e8888 --- /dev/null +++ b/analysis/yaml.go @@ -0,0 +1,290 @@ +package analysis + +import ( + "fmt" + "os" + "strings" + + "github.com/gobwas/glob" + sitter "github.com/smacker/go-tree-sitter" + "gopkg.in/yaml.v3" +) + +// To get a node back from a tree-sitter query, it *must* have a capture name. +// So: (call_expression) will match nothing, but (call_expression) @some_key +// will match all call expressions. +// For filtering patterns with clauses in the yaml file, like: +// filters: +// - pattern-inside: (call_expression) +// - pattern-not-inside: (catch_block) +// +// We need a to append a key name at the end of the pattern written by the user. +// This is the key that we will use. +const filterPatternKey = "__filter__key__" + +type filterYaml struct { + PatternInside string `yaml:"pattern-inside,omitempty"` + PatternNotInside string `yaml:"pattern-not-inside,omitempty"` +} + +type pathFilterYaml struct { + Exclude []string `yaml:"exclude,omitempty"` + Include []string `yaml:"include,omitempty"` +} + +// NodeFilter is a filter that can be applied to a PatternChecker to restrict +// the the nodes that the checker is applied to. +// The checker is only applied to nodes that have a parent matching (or not matching) the query. +type NodeFilter struct { + query *sitter.Query + shouldMatch bool +} + +// PathFilter is a glob that can be applied to a PatternChecker to restrict +// the files that the checker is applied to. +type PathFilter struct { + ExcludeGlobs []glob.Glob + IncludeGlobs []glob.Glob +} + +type Yaml struct { + Language string `yaml:"language"` + Code string `yaml:"name"` + Message string `yaml:"message"` + Category Category `yaml:"category"` + Severity Severity `yaml:"severity"` + Pattern string `yaml:"pattern"` + Patterns []string `yaml:"patterns"` + Description string `yaml:"description"` + Exclude []string `yaml:"exclude,omitempty"` + Include []string `yaml:"include,omitempty"` + Filters []filterYaml `yaml:"filters,omitempty"` + PathFilter *pathFilterYaml `yaml:"path_filter,omitempty"` +} + +type YamlAnalyzer struct { + Analyzer Analyzer + Patterns []*sitter.Query + NodeFilter []NodeFilter + PathFilter *PathFilter + Message string +} + +// ReadFromFile reads a pattern checker definition from a YAML config file. +func ReadFromFile(filePath string) (Analyzer, error) { + fileContent, err := os.ReadFile(filePath) + if err != nil { + return Analyzer{}, err + } + + return ReadFromBytes(fileContent) +} + +// ReadFromBytes reads a pattern checker definition from bytes array +func ReadFromBytes(fileContent []byte) (Analyzer, error) { + var checker Yaml + if err := yaml.Unmarshal(fileContent, &checker); err != nil { + return Analyzer{}, err + } + + lang := DecodeLanguage(checker.Language) + if lang == LangUnknown { + return Analyzer{}, fmt.Errorf("unknown language code: '%s'", checker.Language) + } + + if checker.Code == "" { + return Analyzer{}, fmt.Errorf("no name provided in checker definition") + } + + if checker.Message == "" { + return Analyzer{}, fmt.Errorf("no message provided in checker '%s'", checker.Code) + } + + var patterns []*sitter.Query + if checker.Pattern != "" { + pattern, err := sitter.NewQuery([]byte(checker.Pattern), lang.Grammar()) + if err != nil { + return Analyzer{}, err + } + patterns = append(patterns, pattern) + } else if len(checker.Patterns) > 0 { + for _, patternStr := range checker.Patterns { + pattern, err := sitter.NewQuery([]byte(patternStr), lang.Grammar()) + if err != nil { + return Analyzer{}, err + } + patterns = append(patterns, pattern) + } + } else { + return Analyzer{}, fmt.Errorf("no pattern provided in checker '%s'", checker.Code) + } + + if checker.Pattern != "" && len(checker.Patterns) > 0 { + return Analyzer{}, fmt.Errorf("only one of 'pattern' or 'patterns' can be provided in a checker definition") + } + + // include and exclude patterns + var pathFilter *PathFilter + if checker.Exclude != nil || checker.Include != nil { + pathFilter = &PathFilter{ + ExcludeGlobs: make([]glob.Glob, 0, len(checker.Exclude)), + IncludeGlobs: make([]glob.Glob, 0, len(checker.Include)), + } + + for _, exclude := range checker.Exclude { + g, err := glob.Compile(exclude) + if err != nil { + return Analyzer{}, err + } + pathFilter.ExcludeGlobs = append(pathFilter.ExcludeGlobs, g) + } + + for _, include := range checker.Include { + g, err := glob.Compile(include) + if err != nil { + return Analyzer{}, err + } + pathFilter.IncludeGlobs = append(pathFilter.IncludeGlobs, g) + } + } + + // node filters + var filters []NodeFilter + if checker.Filters != nil { + for _, filter := range checker.Filters { + if filter.PatternInside != "" { + queryStr := filter.PatternInside + " @" + filterPatternKey + query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) + if err != nil { + return Analyzer{}, err + } + + filters = append(filters, NodeFilter{ + query: query, + shouldMatch: true, + }) + } + + if filter.PatternNotInside != "" { + queryStr := filter.PatternNotInside + " @" + filterPatternKey + query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) + if err != nil { + return Analyzer{}, err + } + + filters = append(filters, NodeFilter{ + query: query, + shouldMatch: false, + }) + } + } + } + + patternChecker := &Analyzer{ + Language: lang, + Description: checker.Description, + Category: checker.Category, + Severity: checker.Severity, + } + + yamlAnalyzer := &YamlAnalyzer{ + Analyzer: Analyzer{ + Language: lang, + Description: checker.Description, + Category: checker.Category, + Severity: checker.Severity, + }, + Patterns: patterns, + NodeFilter: filters, + PathFilter: pathFilter, + Message: checker.Message, + } + + patternChecker.Run = RunYamlAnalyzer(yamlAnalyzer) + return *patternChecker, nil +} + +func RunYamlAnalyzer(YamlAnalyzer *YamlAnalyzer) func(pass *Pass) (any, error) { + return func(pass *Pass) (any, error) { + queries := YamlAnalyzer.Patterns + for _, query := range queries { + qc := sitter.NewQueryCursor() + defer qc.Close() + qc.Exec(query, pass.FileContext.Ast) + for { + m, ok := qc.NextMatch() + if !ok { + break + } + m = qc.FilterPredicates(m, pass.FileContext.Source) + for _, capture := range m.Captures { + captureName := query.CaptureNameForId(capture.Index) + if captureName == pass.Analyzer.Name && YamlAnalyzer.runParentFilters(pass.FileContext.Source, capture.Node) { + message := YamlAnalyzer.Message + for _, capture := range m.Captures { + captureName := query.CaptureNameForId(capture.Index) + message = strings.ReplaceAll(message, "@"+captureName, capture.Node.Content(pass.FileContext.Source)) + } + } + pass.Report(pass, capture.Node, YamlAnalyzer.Message) + } + + } + } + return nil, nil + } + +} + +func (ana *YamlAnalyzer) runParentFilters(source []byte, capture *sitter.Node) bool { + filters := ana.NodeFilter + if len(filters) == 0 { + return true + } + + for _, filter := range filters { + shouldMatch := filter.shouldMatch + nodeMatched := false + + for parent := capture.Parent(); parent != nil; parent = parent.Parent() { + if ana.filterMatchesParent(&filter, parent, source) { + nodeMatched = true + if !shouldMatch { + return false + } else { + break + } + } + } + + if !nodeMatched && shouldMatch { + return false + } + } + + return true +} + +func (ana *YamlAnalyzer) filterMatchesParent(filter *NodeFilter, parent *sitter.Node, source []byte) bool { + qc := sitter.NewQueryCursor() + defer qc.Close() + + qc.Exec(filter.query, parent) + + for { + m, ok := qc.NextMatch() + if !ok { + break + } + + m = qc.FilterPredicates(m, source) + for _, capture := range m.Captures { + captureName := filter.query.CaptureNameForId(capture.Index) + if captureName == filterPatternKey && capture.Node == parent { + return true + } + } + } + + return false +} diff --git a/checkers/checker.go b/checkers/checker.go index 9ed39107..88c40286 100644 --- a/checkers/checker.go +++ b/checkers/checker.go @@ -8,13 +8,12 @@ import ( "path/filepath" goAnalysis "globstar.dev/analysis" - "globstar.dev/pkg/analysis" ) //go:embed **/*.y*ml var builtinCheckers embed.FS -func findYamlCheckers(checkersMap map[analysis.Language][]analysis.YamlChecker) func(path string, d fs.DirEntry, err error) error { +func findYamlCheckers(checkersMap map[goAnalysis.Language][]goAnalysis.Analyzer) func(path string, d fs.DirEntry, err error) error { return func(path string, d fs.DirEntry, err error) error { if err != nil { return nil @@ -35,25 +34,25 @@ func findYamlCheckers(checkersMap map[analysis.Language][]analysis.YamlChecker) return nil } - patternChecker, err := analysis.ReadFromBytes(fileContent) + patternChecker, err := goAnalysis.ReadFromBytes(fileContent) if err != nil { return fmt.Errorf("invalid checker '%s': %s", d.Name(), err.Error()) } - lang := patternChecker.Language() + lang := patternChecker.Language checkersMap[lang] = append(checkersMap[lang], patternChecker) return nil } } -func LoadBuiltinYamlCheckers() (map[analysis.Language][]analysis.YamlChecker, error) { - checkersMap := make(map[analysis.Language][]analysis.YamlChecker) +func LoadBuiltinYamlCheckers() (map[goAnalysis.Language][]goAnalysis.Analyzer, error) { + checkersMap := make(map[goAnalysis.Language][]goAnalysis.Analyzer) err := fs.WalkDir(builtinCheckers, ".", findYamlCheckers(checkersMap)) return checkersMap, err } -func LoadCustomYamlCheckers(dir string) (map[analysis.Language][]analysis.YamlChecker, error) { - checkersMap := make(map[analysis.Language][]analysis.YamlChecker) +func LoadCustomYamlCheckers(dir string) (map[goAnalysis.Language][]goAnalysis.Analyzer, error) { + checkersMap := make(map[goAnalysis.Language][]goAnalysis.Analyzer) err := fs.WalkDir(os.DirFS(dir), ".", findYamlCheckers(checkersMap)) return checkersMap, err } diff --git a/cmd/globstar/main.go b/cmd/globstar/main.go index 073f0902..9e1d20d5 100644 --- a/cmd/globstar/main.go +++ b/cmd/globstar/main.go @@ -16,7 +16,7 @@ func main() { cli := cli.Cli{ RootDirectory: cwd, - Checkers: nil, // no custom checker set + // Checkers: nil, // no custom checker set } err = cli.Run() diff --git a/pkg/analysis/analyze.go b/pkg/analysis/analyze.go index d08924ca..9a344286 100644 --- a/pkg/analysis/analyze.go +++ b/pkg/analysis/analyze.go @@ -1,438 +1,438 @@ package analysis -import ( - "fmt" - "path/filepath" - "regexp" - "strings" - - sitter "github.com/smacker/go-tree-sitter" - ana "globstar.dev/analysis" -) - -// type Issue struct { -// // The category of the issue -// Category config.Category -// // The severity of the issue -// Severity config.Severity -// // The message to display to the user -// Message string -// // The file path of the file that the issue was found in -// Filepath string -// // The range of the issue in the source code -// Range sitter.Range -// // (optional) The AST node that caused the issue -// Node *sitter.Node -// // Id is a unique ID for the issue. -// // Issue that have 'Id's can be explained using the `globstar desc` command. -// Id *string +// import ( +// "fmt" +// "path/filepath" +// "regexp" +// "strings" + +// sitter "github.com/smacker/go-tree-sitter" +// ana "globstar.dev/analysis" +// ) + +// // type Issue struct { +// // // The category of the issue +// // Category config.Category +// // // The severity of the issue +// // Severity config.Severity +// // // The message to display to the user +// // Message string +// // // The file path of the file that the issue was found in +// // Filepath string +// // // The range of the issue in the source code +// // Range sitter.Range +// // // (optional) The AST node that caused the issue +// // Node *sitter.Node +// // // Id is a unique ID for the issue. +// // // Issue that have 'Id's can be explained using the `globstar desc` command. +// // Id *string +// // } + +// // func (i *Issue) AsJson() ([]byte, error) { +// // type location struct { +// // Row int `json:"row"` +// // Column int `json:"column"` +// // } + +// // type position struct { +// // Filename string `json:"filename"` +// // Start location `json:"start"` +// // End location `json:"end"` +// // } + +// // type issueJson struct { +// // Category config.Category `json:"category"` +// // Severity config.Severity `json:"severity"` +// // Message string `json:"message"` +// // Range position `json:"range"` +// // Id string `json:"id"` +// // } +// // issue := issueJson{ +// // Category: i.Category, +// // Severity: i.Severity, +// // Message: i.Message, +// // Range: position{ +// // Filename: i.Filepath, +// // Start: location{ +// // Row: int(i.Range.StartPoint.Row), +// // Column: int(i.Range.StartPoint.Column), +// // }, +// // End: location{ +// // Row: int(i.Range.EndPoint.Row), +// // Column: int(i.Range.EndPoint.Column), +// // }, +// // }, +// // Id: *i.Id, +// // } + +// // return json.Marshal(issue) +// // } + +// // func (i *Issue) AsText() ([]byte, error) { +// // return []byte(fmt.Sprintf("%s:%d:%d:%s", i.Filepath, i.Range.StartPoint.Row, i.Range.StartPoint.Column, i.Message)), nil +// // } + +// type Analyzer struct { +// Language Language +// // WorkDir is the directory in which the analysis is being run. +// WorkDir string +// // ParseResult is the result of parsing a file with a tree-sitter parser, +// // along with some extra appendages (e.g: scope information). +// ParseResult *ParseResult +// // checkers is a list of all checkers that should be applied to the AST +// // for this language. +// checkers []Checker +// // patternCheckers is a list of all checkers that run after a query is run on the AST. +// // Usually, these are written in a DSL (which, for now, is the tree-sitter S-Expression query language) +// YamlCheckers []YamlChecker +// // entryCheckers maps node types to the checkers that should be applied +// // when entering that node. +// entryCheckersForNode map[string][]Checker +// // exitCheckers maps node types to the checkers that should be applied +// // when leaving that node. +// exitCheckersForNode map[string][]Checker +// issuesRaised []*ana.Issue // } -// func (i *Issue) AsJson() ([]byte, error) { -// type location struct { -// Row int `json:"row"` -// Column int `json:"column"` +// type SkipComment struct { +// // the line number for the skipcq comment +// CommentLine int +// // the entire text of the skipcq comment +// CommentText string +// // (optional) name of the checker for targetted skip +// CheckerIds []string +// } + +// // package level cache to store comments for each file +// var fileSkipComment = make(map[string][]*SkipComment) + +// func InitializeSkipComments(analyzers []*Analyzer) { +// fileSkipComments := make(map[string][]*SkipComment) + +// processedPaths := make(map[string]bool) + +// for _, analyzer := range analyzers { +// filepath := analyzer.ParseResult.FilePath +// if processedPaths[filepath] { +// continue +// } + +// processedPaths[filepath] = true +// fileSkipComments[filepath] = GatherSkipInfo(analyzer.ParseResult) // } +// } -// type position struct { -// Filename string `json:"filename"` -// Start location `json:"start"` -// End location `json:"end"` +// func FromFile(filePath string, baseCheckers []Checker) (*Analyzer, error) { +// res, err := ParseFile(filePath) +// if err != nil { +// return nil, err // } -// type issueJson struct { -// Category config.Category `json:"category"` -// Severity config.Severity `json:"severity"` -// Message string `json:"message"` -// Range position `json:"range"` -// Id string `json:"id"` +// return NewAnalyzer(res, baseCheckers), nil +// } + +// func NewAnalyzer(file *ParseResult, checkers []Checker) *Analyzer { +// ana := &Analyzer{ +// ParseResult: file, +// Language: file.Language, +// entryCheckersForNode: map[string][]Checker{}, +// exitCheckersForNode: map[string][]Checker{}, // } -// issue := issueJson{ -// Category: i.Category, -// Severity: i.Severity, -// Message: i.Message, -// Range: position{ -// Filename: i.Filepath, -// Start: location{ -// Row: int(i.Range.StartPoint.Row), -// Column: int(i.Range.StartPoint.Column), -// }, -// End: location{ -// Row: int(i.Range.EndPoint.Row), -// Column: int(i.Range.EndPoint.Column), -// }, -// }, -// Id: *i.Id, + +// for _, checker := range checkers { +// ana.AddChecker(checker) // } -// return json.Marshal(issue) +// return ana // } -// func (i *Issue) AsText() ([]byte, error) { -// return []byte(fmt.Sprintf("%s:%d:%d:%s", i.Filepath, i.Range.StartPoint.Row, i.Range.StartPoint.Column, i.Message)), nil +// func (ana *Analyzer) Analyze() []*ana.Issue { +// WalkTree(ana.ParseResult.Ast, ana) +// ana.runPatternCheckers() +// return ana.issuesRaised +// } + +// func (ana *Analyzer) AddChecker(checker Checker) { +// ana.checkers = append(ana.checkers, checker) +// typ := checker.NodeType() + +// if checker.OnEnter() != nil { +// ana.entryCheckersForNode[typ] = append(ana.entryCheckersForNode[typ], checker) +// } + +// if checker.OnLeave() != nil { +// ana.exitCheckersForNode[typ] = append(ana.exitCheckersForNode[typ], checker) +// } +// } + +// func (ana *Analyzer) OnEnterNode(node *sitter.Node) bool { +// nodeType := node.Type() +// checkers := ana.entryCheckersForNode[nodeType] +// for _, checker := range checkers { +// visitFn := checker.OnEnter() +// if visitFn != nil { +// (*visitFn)(checker, ana, node) +// } +// } +// return true // } -type Analyzer struct { - Language Language - // WorkDir is the directory in which the analysis is being run. - WorkDir string - // ParseResult is the result of parsing a file with a tree-sitter parser, - // along with some extra appendages (e.g: scope information). - ParseResult *ParseResult - // checkers is a list of all checkers that should be applied to the AST - // for this language. - checkers []Checker - // patternCheckers is a list of all checkers that run after a query is run on the AST. - // Usually, these are written in a DSL (which, for now, is the tree-sitter S-Expression query language) - YamlCheckers []YamlChecker - // entryCheckers maps node types to the checkers that should be applied - // when entering that node. - entryCheckersForNode map[string][]Checker - // exitCheckers maps node types to the checkers that should be applied - // when leaving that node. - exitCheckersForNode map[string][]Checker - issuesRaised []*ana.Issue -} - -type SkipComment struct { - // the line number for the skipcq comment - CommentLine int - // the entire text of the skipcq comment - CommentText string - // (optional) name of the checker for targetted skip - CheckerIds []string -} - -// package level cache to store comments for each file -var fileSkipComment = make(map[string][]*SkipComment) - -func InitializeSkipComments(analyzers []*Analyzer) { - fileSkipComments := make(map[string][]*SkipComment) - - processedPaths := make(map[string]bool) - - for _, analyzer := range analyzers { - filepath := analyzer.ParseResult.FilePath - if processedPaths[filepath] { - continue - } - - processedPaths[filepath] = true - fileSkipComments[filepath] = GatherSkipInfo(analyzer.ParseResult) - } -} - -func FromFile(filePath string, baseCheckers []Checker) (*Analyzer, error) { - res, err := ParseFile(filePath) - if err != nil { - return nil, err - } - - return NewAnalyzer(res, baseCheckers), nil -} - -func NewAnalyzer(file *ParseResult, checkers []Checker) *Analyzer { - ana := &Analyzer{ - ParseResult: file, - Language: file.Language, - entryCheckersForNode: map[string][]Checker{}, - exitCheckersForNode: map[string][]Checker{}, - } - - for _, checker := range checkers { - ana.AddChecker(checker) - } - - return ana -} - -func (ana *Analyzer) Analyze() []*ana.Issue { - WalkTree(ana.ParseResult.Ast, ana) - ana.runPatternCheckers() - return ana.issuesRaised -} - -func (ana *Analyzer) AddChecker(checker Checker) { - ana.checkers = append(ana.checkers, checker) - typ := checker.NodeType() - - if checker.OnEnter() != nil { - ana.entryCheckersForNode[typ] = append(ana.entryCheckersForNode[typ], checker) - } - - if checker.OnLeave() != nil { - ana.exitCheckersForNode[typ] = append(ana.exitCheckersForNode[typ], checker) - } -} - -func (ana *Analyzer) OnEnterNode(node *sitter.Node) bool { - nodeType := node.Type() - checkers := ana.entryCheckersForNode[nodeType] - for _, checker := range checkers { - visitFn := checker.OnEnter() - if visitFn != nil { - (*visitFn)(checker, ana, node) - } - } - return true -} - -func (ana *Analyzer) OnLeaveNode(node *sitter.Node) { - nodeType := node.Type() - checkers := ana.exitCheckersForNode[nodeType] - for _, checker := range checkers { - visitFn := checker.OnLeave() - if visitFn != nil { - (*visitFn)(checker, ana, node) - } - } -} - -func (ana *Analyzer) shouldSkipChecker(checker YamlChecker) bool { - pathFilter := checker.PathFilter() - if pathFilter == nil { - // no filter is set, so we should not skip this checker - return false - } - - relPath := ana.ParseResult.FilePath - if ana.WorkDir != "" { - rel, err := filepath.Rel(ana.WorkDir, ana.ParseResult.FilePath) - if err == nil { - relPath = rel - } - } - - if len(pathFilter.ExcludeGlobs) > 0 { - for _, excludeGlob := range pathFilter.ExcludeGlobs { - if excludeGlob.Match(relPath) { - return true - } - } - - // no exclude globs matched, so we should not skip this checker - return false - } - - if len(pathFilter.IncludeGlobs) > 0 { - for _, includeGlob := range pathFilter.IncludeGlobs { - if includeGlob.Match(relPath) { - return false - } - } - - // no include globs matched, so we should skip this checker - return true - } - - return false -} - -func (ana *Analyzer) filterMatchesParent(filter *NodeFilter, parent *sitter.Node) bool { - qc := sitter.NewQueryCursor() - defer qc.Close() - - qc.Exec(filter.query, parent) - - // check if the filter matches the `parent` node - for { - m, ok := qc.NextMatch() - if !ok { - break - } - - m = qc.FilterPredicates(m, ana.ParseResult.Source) - for _, capture := range m.Captures { - captureName := filter.query.CaptureNameForId(capture.Index) - if captureName == filterPatternKey && capture.Node == parent { - return true - } - } - } - - return false -} - -// runParentFilters checks if the parent filters for a checker match the given node. -func (ana *Analyzer) runParentFilters(checker YamlChecker, node *sitter.Node) bool { - filters := checker.NodeFilters() - if len(filters) == 0 { - return true - } - - for _, filter := range filters { - shouldMatch := filter.shouldMatch - nodeMatched := false - - // The matched node is expected to be a child of some other - // node, but it has no parents (is a top-level node) - if node.Parent() == nil && filter.shouldMatch { - return false - } - - for parent := node.Parent(); parent != nil; parent = parent.Parent() { - if ana.filterMatchesParent(&filter, parent) { - nodeMatched = true - if !shouldMatch { - // pattern-not-inside matched, so this checker should be skipped - return false - } else { - // pattern-inside matched, so we can break out of the loop - break - } - } - } - - if !nodeMatched && shouldMatch { - return false - } - } - - return true -} - -func (ana *Analyzer) executeCheckerQuery(checker YamlChecker, query *sitter.Query) { - qc := sitter.NewQueryCursor() - defer qc.Close() - - qc.Exec(query, ana.ParseResult.Ast) - for { - m, ok := qc.NextMatch() - - if !ok { - break - } - - m = qc.FilterPredicates(m, ana.ParseResult.Source) - for _, capture := range m.Captures { - captureName := query.CaptureNameForId(capture.Index) - // TODO: explain why captureName == checker.Name() - if captureName == checker.Name() && ana.runParentFilters(checker, capture.Node) { - checker.OnMatch(ana, query, capture.Node, m.Captures) - } - } - } -} - -// runPatternCheckers executes all checkers that are written as AST queries. -func (ana *Analyzer) runPatternCheckers() { - for _, checker := range ana.YamlCheckers { - if ana.shouldSkipChecker(checker) { - continue - } - - queries := checker.Patterns() - for _, q := range queries { - ana.executeCheckerQuery(checker, q) - } - } -} - -func (ana *Analyzer) Report(issue *ana.Issue) { - ana.issuesRaised = append(ana.issuesRaised, issue) -} - -func RunYamlCheckers(path string, analyzers []*Analyzer) ([]*ana.Issue, error) { - InitializeSkipComments(analyzers) - - issues := []*ana.Issue{} - for _, analyzer := range analyzers { - issues = append(issues, analyzer.Analyze()...) - } - return issues, nil -} - -func GatherSkipInfo(fileContext *ParseResult) []*SkipComment { - var skipLines []*SkipComment - - commentIdentifier := GetEscapedCommentIdentifierFromPath(fileContext.FilePath) - pattern := fmt.Sprintf(`%s(?i).*?\bskipcq\b(?::(?:\s*(?P([A-Za-z\-_0-9]*(?:,\s*)?)+))?)?`, commentIdentifier) - skipRegexp := regexp.MustCompile(pattern) - - query, err := sitter.NewQuery([]byte("(comment) @skipcq"), fileContext.Language.Grammar()) - - if err != nil { - return skipLines - } - - cursor := sitter.NewQueryCursor() - cursor.Exec(query, fileContext.Ast) - - // gather all skipcq comment lines in a single pass - for { - m, ok := cursor.NextMatch() - if !ok { - break - } - - for _, capture := range m.Captures { - captureName := query.CaptureNameForId(capture.Index) - if captureName != "skipcq" { - continue - } - - commentNode := capture.Node - commentLine := int(commentNode.StartPoint().Row) - commentText := commentNode.Content(fileContext.Source) - - matches := skipRegexp.FindStringSubmatch(commentText) - if matches != nil { - issueIdsIdx := skipRegexp.SubexpIndex("issue_ids") - var checkerIds []string - - if issueIdsIdx != -1 && issueIdsIdx < len(matches) && matches[issueIdsIdx] != "" { - issueIdsIdx := matches[issueIdsIdx] - idSlice := strings.Split(issueIdsIdx, ",") - for _, id := range idSlice { - trimmedId := strings.TrimSpace(id) - if trimmedId != "" { - checkerIds = append(checkerIds, trimmedId) - } - } - } - - skipLines = append(skipLines, &SkipComment{ - CommentLine: commentLine, - CommentText: commentText, - CheckerIds: checkerIds, // will be empty for generic skipcq - }) - } - - } - } - - return skipLines -} - -func (ana *Analyzer) ContainsSkipcq(skipLines []*SkipComment, issue *ana.Issue) bool { - if len(skipLines) == 0 { - return false - } - - issueNode := issue.Node - nodeLine := int(issueNode.StartPoint().Row) - prevLine := nodeLine - 1 - - var checkerId string - if issue.Id != nil { - checkerId = *issue.Id - } - - for _, comment := range skipLines { - if comment.CommentLine != nodeLine && comment.CommentLine != prevLine { - continue - } - - if len(comment.CheckerIds) > 0 { - for _, id := range comment.CheckerIds { - if checkerId == id { - return true - } - } - } else { - return true - } - } - - return false -} +// func (ana *Analyzer) OnLeaveNode(node *sitter.Node) { +// nodeType := node.Type() +// checkers := ana.exitCheckersForNode[nodeType] +// for _, checker := range checkers { +// visitFn := checker.OnLeave() +// if visitFn != nil { +// (*visitFn)(checker, ana, node) +// } +// } +// } + +// func (ana *Analyzer) shouldSkipChecker(checker YamlChecker) bool { +// pathFilter := checker.PathFilter() +// if pathFilter == nil { +// // no filter is set, so we should not skip this checker +// return false +// } + +// relPath := ana.ParseResult.FilePath +// if ana.WorkDir != "" { +// rel, err := filepath.Rel(ana.WorkDir, ana.ParseResult.FilePath) +// if err == nil { +// relPath = rel +// } +// } + +// if len(pathFilter.ExcludeGlobs) > 0 { +// for _, excludeGlob := range pathFilter.ExcludeGlobs { +// if excludeGlob.Match(relPath) { +// return true +// } +// } + +// // no exclude globs matched, so we should not skip this checker +// return false +// } + +// if len(pathFilter.IncludeGlobs) > 0 { +// for _, includeGlob := range pathFilter.IncludeGlobs { +// if includeGlob.Match(relPath) { +// return false +// } +// } + +// // no include globs matched, so we should skip this checker +// return true +// } + +// return false +// } + +// func (ana *Analyzer) filterMatchesParent(filter *NodeFilter, parent *sitter.Node) bool { +// qc := sitter.NewQueryCursor() +// defer qc.Close() + +// qc.Exec(filter.query, parent) + +// // check if the filter matches the `parent` node +// for { +// m, ok := qc.NextMatch() +// if !ok { +// break +// } + +// m = qc.FilterPredicates(m, ana.ParseResult.Source) +// for _, capture := range m.Captures { +// captureName := filter.query.CaptureNameForId(capture.Index) +// if captureName == filterPatternKey && capture.Node == parent { +// return true +// } +// } +// } + +// return false +// } + +// // runParentFilters checks if the parent filters for a checker match the given node. +// func (ana *Analyzer) runParentFilters(checker YamlChecker, node *sitter.Node) bool { +// filters := checker.NodeFilters() +// if len(filters) == 0 { +// return true +// } + +// for _, filter := range filters { +// shouldMatch := filter.shouldMatch +// nodeMatched := false + +// // The matched node is expected to be a child of some other +// // node, but it has no parents (is a top-level node) +// if node.Parent() == nil && filter.shouldMatch { +// return false +// } + +// for parent := node.Parent(); parent != nil; parent = parent.Parent() { +// if ana.filterMatchesParent(&filter, parent) { +// nodeMatched = true +// if !shouldMatch { +// // pattern-not-inside matched, so this checker should be skipped +// return false +// } else { +// // pattern-inside matched, so we can break out of the loop +// break +// } +// } +// } + +// if !nodeMatched && shouldMatch { +// return false +// } +// } + +// return true +// } + +// func (ana *Analyzer) executeCheckerQuery(checker YamlChecker, query *sitter.Query) { +// qc := sitter.NewQueryCursor() +// defer qc.Close() + +// qc.Exec(query, ana.ParseResult.Ast) +// for { +// m, ok := qc.NextMatch() + +// if !ok { +// break +// } + +// m = qc.FilterPredicates(m, ana.ParseResult.Source) +// for _, capture := range m.Captures { +// captureName := query.CaptureNameForId(capture.Index) +// // TODO: explain why captureName == checker.Name() +// if captureName == checker.Name() && ana.runParentFilters(checker, capture.Node) { +// checker.OnMatch(ana, query, capture.Node, m.Captures) +// } +// } +// } +// } + +// // runPatternCheckers executes all checkers that are written as AST queries. +// func (ana *Analyzer) runPatternCheckers() { +// for _, checker := range ana.YamlCheckers { +// if ana.shouldSkipChecker(checker) { +// continue +// } + +// queries := checker.Patterns() +// for _, q := range queries { +// ana.executeCheckerQuery(checker, q) +// } +// } +// } + +// func (ana *Analyzer) Report(issue *ana.Issue) { +// ana.issuesRaised = append(ana.issuesRaised, issue) +// } + +// func RunYamlCheckers(path string, analyzers []*Analyzer) ([]*ana.Issue, error) { +// InitializeSkipComments(analyzers) + +// issues := []*ana.Issue{} +// for _, analyzer := range analyzers { +// issues = append(issues, analyzer.Analyze()...) +// } +// return issues, nil +// } + +// func GatherSkipInfo(fileContext *ParseResult) []*SkipComment { +// var skipLines []*SkipComment + +// commentIdentifier := GetEscapedCommentIdentifierFromPath(fileContext.FilePath) +// pattern := fmt.Sprintf(`%s(?i).*?\bskipcq\b(?::(?:\s*(?P([A-Za-z\-_0-9]*(?:,\s*)?)+))?)?`, commentIdentifier) +// skipRegexp := regexp.MustCompile(pattern) + +// query, err := sitter.NewQuery([]byte("(comment) @skipcq"), fileContext.Language.Grammar()) + +// if err != nil { +// return skipLines +// } + +// cursor := sitter.NewQueryCursor() +// cursor.Exec(query, fileContext.Ast) + +// // gather all skipcq comment lines in a single pass +// for { +// m, ok := cursor.NextMatch() +// if !ok { +// break +// } + +// for _, capture := range m.Captures { +// captureName := query.CaptureNameForId(capture.Index) +// if captureName != "skipcq" { +// continue +// } + +// commentNode := capture.Node +// commentLine := int(commentNode.StartPoint().Row) +// commentText := commentNode.Content(fileContext.Source) + +// matches := skipRegexp.FindStringSubmatch(commentText) +// if matches != nil { +// issueIdsIdx := skipRegexp.SubexpIndex("issue_ids") +// var checkerIds []string + +// if issueIdsIdx != -1 && issueIdsIdx < len(matches) && matches[issueIdsIdx] != "" { +// issueIdsIdx := matches[issueIdsIdx] +// idSlice := strings.Split(issueIdsIdx, ",") +// for _, id := range idSlice { +// trimmedId := strings.TrimSpace(id) +// if trimmedId != "" { +// checkerIds = append(checkerIds, trimmedId) +// } +// } +// } + +// skipLines = append(skipLines, &SkipComment{ +// CommentLine: commentLine, +// CommentText: commentText, +// CheckerIds: checkerIds, // will be empty for generic skipcq +// }) +// } + +// } +// } + +// return skipLines +// } + +// func (ana *Analyzer) ContainsSkipcq(skipLines []*SkipComment, issue *ana.Issue) bool { +// if len(skipLines) == 0 { +// return false +// } + +// issueNode := issue.Node +// nodeLine := int(issueNode.StartPoint().Row) +// prevLine := nodeLine - 1 + +// var checkerId string +// if issue.Id != nil { +// checkerId = *issue.Id +// } + +// for _, comment := range skipLines { +// if comment.CommentLine != nodeLine && comment.CommentLine != prevLine { +// continue +// } + +// if len(comment.CheckerIds) > 0 { +// for _, id := range comment.CheckerIds { +// if checkerId == id { +// return true +// } +// } +// } else { +// return true +// } +// } + +// return false +// } diff --git a/pkg/analysis/analyze_test.go b/pkg/analysis/analyze_test.go index 31efdf01..dc96b74b 100644 --- a/pkg/analysis/analyze_test.go +++ b/pkg/analysis/analyze_test.go @@ -1,166 +1,166 @@ package analysis -import ( - "testing" +// import ( +// "testing" - sitter "github.com/smacker/go-tree-sitter" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "globstar.dev/analysis" -) +// sitter "github.com/smacker/go-tree-sitter" +// "github.com/stretchr/testify/assert" +// "github.com/stretchr/testify/require" +// "globstar.dev/analysis" +// ) -func parseTestFile(t *testing.T, filename string, source string, language Language) *ParseResult { - parsed, err := Parse(filename, []byte(source), language, language.Grammar()) - require.NoError(t, err) - require.NotNil(t, parsed) - return parsed -} +// func parseTestFile(t *testing.T, filename string, source string, language Language) *ParseResult { +// parsed, err := Parse(filename, []byte(source), language, language.Grammar()) +// require.NoError(t, err) +// require.NotNil(t, parsed) +// return parsed +// } -func TestSkipCq(t *testing.T) { - tests := []struct { - name string - checkerId string - source string - language Language - want bool - }{ - { - name: "skipcq comment on same line", - checkerId: "no-assert", - language: LangPy, - source: ` - def someFunc(a, b): - assert a == b # skipcq - `, - want: true, - }, - { - name: "skipcq comment on previous line", - checkerId: "no-assert", - language: LangPy, - source: ` - if True: - # skipcq - assert 1 == 2 - `, - want: true, - }, - { - name: "skipcq comment with target checker", - checkerId: "no-assert", - language: LangPy, - source: ` - if a > 20: - # skipcq: no-assert - assert 5 == 0 - `, - want: true, - }, - { - name: "skipcq comment with mismatches target checker", - checkerId: "no-assert", - language: LangPy, - source: ` - assert a >= float('inf') # skipcq: csv-writer - `, - want: false, - }, - { - name: "skipcq comment not present", - checkerId: "no-assert", - language: LangPy, - source: ` - assert a == b - `, - want: false, - }, - { - name: "skipcq with multiple targets matching", - checkerId: "no-assert", - language: LangPy, - source: ` - # skipcq: csv-writer, no-assert - assert 1 == 10 - `, - want: true, - }, - { - name: "skipcq with multiple targets mismatching", - checkerId: "no-assert", - language: LangPy, - source: ` - assert 2==1 # skipcq: csv-writer, flask-error - `, - want: false, - }, - { - name: "skipcq with extra comments target match", - checkerId: "no-assert", - language: LangPy, - source: ` - def aFunc(): - assert a == b # some comment skipcq: no-assert, sql-inject # nosec, - `, - want: true, - }, - { - name: "skipcq with extra comments target unmatched", - checkerId: "no-assert", - language: LangPy, - source: ` - assert a is b # should be true skipcq: sql-inject, django-taint # more - `, - want: false, - }, - { - name: "skipcq with extra comments no target", - checkerId: "no-assert", - language: LangPy, - source: ` - if True: - assert 1 == 2 # must be false skipcq # nosec, - `, - want: true, - }, - } +// func TestSkipCq(t *testing.T) { +// tests := []struct { +// name string +// checkerId string +// source string +// language Language +// want bool +// }{ +// { +// name: "skipcq comment on same line", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// def someFunc(a, b): +// assert a == b # skipcq +// `, +// want: true, +// }, +// { +// name: "skipcq comment on previous line", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// if True: +// # skipcq +// assert 1 == 2 +// `, +// want: true, +// }, +// { +// name: "skipcq comment with target checker", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// if a > 20: +// # skipcq: no-assert +// assert 5 == 0 +// `, +// want: true, +// }, +// { +// name: "skipcq comment with mismatches target checker", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// assert a >= float('inf') # skipcq: csv-writer +// `, +// want: false, +// }, +// { +// name: "skipcq comment not present", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// assert a == b +// `, +// want: false, +// }, +// { +// name: "skipcq with multiple targets matching", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// # skipcq: csv-writer, no-assert +// assert 1 == 10 +// `, +// want: true, +// }, +// { +// name: "skipcq with multiple targets mismatching", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// assert 2==1 # skipcq: csv-writer, flask-error +// `, +// want: false, +// }, +// { +// name: "skipcq with extra comments target match", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// def aFunc(): +// assert a == b # some comment skipcq: no-assert, sql-inject # nosec, +// `, +// want: true, +// }, +// { +// name: "skipcq with extra comments target unmatched", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// assert a is b # should be true skipcq: sql-inject, django-taint # more +// `, +// want: false, +// }, +// { +// name: "skipcq with extra comments no target", +// checkerId: "no-assert", +// language: LangPy, +// source: ` +// if True: +// assert 1 == 2 # must be false skipcq # nosec, +// `, +// want: true, +// }, +// } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - parsed := parseTestFile(t, "no-assert.test.py", tt.source, tt.language) - analyzer := &Analyzer{ - Language: tt.language, - ParseResult: parsed, - } +// for _, tt := range tests { +// t.Run(tt.name, func(t *testing.T) { +// parsed := parseTestFile(t, "no-assert.test.py", tt.source, tt.language) +// analyzer := &Analyzer{ +// Language: tt.language, +// ParseResult: parsed, +// } - query, err := sitter.NewQuery([]byte("(assert_statement) @assert"), tt.language.Grammar()) - require.NoError(t, err) +// query, err := sitter.NewQuery([]byte("(assert_statement) @assert"), tt.language.Grammar()) +// require.NoError(t, err) - cursor := sitter.NewQueryCursor() - cursor.Exec(query, parsed.Ast) +// cursor := sitter.NewQueryCursor() +// cursor.Exec(query, parsed.Ast) - match, ok := cursor.NextMatch() - require.True(t, ok, "failed to find assert statements") +// match, ok := cursor.NextMatch() +// require.True(t, ok, "failed to find assert statements") - var assertNode *sitter.Node - for _, captureNode := range match.Captures { - if query.CaptureNameForId(captureNode.Index) == "assert" { - assertNode = captureNode.Node - break - } - } +// var assertNode *sitter.Node +// for _, captureNode := range match.Captures { +// if query.CaptureNameForId(captureNode.Index) == "assert" { +// assertNode = captureNode.Node +// break +// } +// } - require.NotNil(t, assertNode, "failed to capture assert node") +// require.NotNil(t, assertNode, "failed to capture assert node") - issue := &analysis.Issue{ - Filepath: "no-assert.test.py", - Node: assertNode, - Id: &tt.checkerId, - } +// issue := &analysis.Issue{ +// Filepath: "no-assert.test.py", +// Node: assertNode, +// Id: &tt.checkerId, +// } - skipComments := GatherSkipInfo(parsed) +// skipComments := GatherSkipInfo(parsed) - res := analyzer.ContainsSkipcq(skipComments, issue) - assert.Equal(t, tt.want, res) - }) - } -} +// res := analyzer.ContainsSkipcq(skipComments, issue) +// assert.Equal(t, tt.want, res) +// }) +// } +// } diff --git a/pkg/analysis/pattern_rule.go b/pkg/analysis/pattern_rule.go index 06e84925..2cb6ef1c 100644 --- a/pkg/analysis/pattern_rule.go +++ b/pkg/analysis/pattern_rule.go @@ -1,361 +1,360 @@ package analysis -import ( - "fmt" - "os" - "strings" - - "github.com/gobwas/glob" - sitter "github.com/smacker/go-tree-sitter" - analysis "globstar.dev/analysis" - "globstar.dev/pkg/config" - "gopkg.in/yaml.v3" -) - -// To get a node back from a tree-sitter query, it *must* have a capture name. -// So: (call_expression) will match nothing, but (call_expression) @some_key -// will match all call expressions. -// For filtering patterns with clauses in the yaml file, like: -// filters: -// - pattern-inside: (call_expression) -// - pattern-not-inside: (catch_block) -// -// We need a to append a key name at the end of the pattern written by the user. -// This is the key that we will use. -const filterPatternKey = "__filter__key__" - -// A YamlChecker is a checker that matches a tree-sitter query pattern -// and reports an issue when the pattern is found. -// Unlike regular issues, PatternCheckers are not associated with a specific node type, rather -// they are invoked for *every* node that matches the pattern. -type YamlChecker interface { - Name() string - Patterns() []*sitter.Query - Language() Language - Category() config.Category - Severity() config.Severity - OnMatch( - ana *Analyzer, // the analyzer instance - matchedQuery *sitter.Query, // the query that found an AST node - matchedNode *sitter.Node, // the AST node that matched the query - captures []sitter.QueryCapture, // list of captures made inside the query - ) - PathFilter() *PathFilter - NodeFilters() []NodeFilter -} - -// NodeFilter is a filter that can be applied to a PatternChecker to restrict -// the the nodes that the checker is applied to. -// The checker is only applied to nodes that have a parent matching (or not matching) the query. -type NodeFilter struct { - query *sitter.Query - shouldMatch bool -} - -// PathFilter is a glob that can be applied to a PatternChecker to restrict -// the files that the checker is applied to. -type PathFilter struct { - ExcludeGlobs []glob.Glob - IncludeGlobs []glob.Glob -} - -type patternCheckerImpl struct { - language Language - patterns []*sitter.Query - issueMessage string - issueId string - category config.Category - severity config.Severity - pathFilter *PathFilter - filters []NodeFilter -} - -func (r *patternCheckerImpl) Language() Language { - return r.language -} - -func (r *patternCheckerImpl) Patterns() []*sitter.Query { - return r.patterns -} - -func (r *patternCheckerImpl) OnMatch( - ana *Analyzer, - matchedQuery *sitter.Query, - matchedNode *sitter.Node, - captures []sitter.QueryCapture, -) { - - // replace all '@' with the corresponding capture value - message := r.issueMessage - // TODO: 1. escape '@' in the message, 2. use a more efficient way to replace - for strings.ContainsRune(message, '@') { - for _, capture := range captures { - captureName := matchedQuery.CaptureNameForId(capture.Index) - message = strings.ReplaceAll( - message, - "@"+captureName, - capture.Node.Content(ana.ParseResult.Source), - ) - } - } - - raisedIssue := &analysis.Issue{ - Message: message, - Filepath: ana.ParseResult.FilePath, - Category: analysis.Category(r.Category()), - Severity: analysis.Severity(r.Severity()), - Id: &r.issueId, - Node: matchedNode, - } - - filepath := ana.ParseResult.FilePath - skipComments := fileSkipComment[filepath] - if !ana.ContainsSkipcq(skipComments, raisedIssue) { - ana.Report(raisedIssue) - } -} - -func (r *patternCheckerImpl) Name() string { - return r.issueId -} - -func (r *patternCheckerImpl) PathFilter() *PathFilter { - return r.pathFilter -} - -func (r *patternCheckerImpl) NodeFilters() []NodeFilter { - return r.filters -} - -func (r *patternCheckerImpl) Category() config.Category { - return r.category -} - -func (r *patternCheckerImpl) Severity() config.Severity { - return r.severity -} - -func CreatePatternChecker( - patterns []*sitter.Query, - language Language, - issueMessage string, - issueId string, - pathFilter *PathFilter, -) YamlChecker { - return &patternCheckerImpl{ - language: language, - patterns: patterns, - issueMessage: issueMessage, - issueId: issueId, - pathFilter: pathFilter, - } -} - -type filterYAML struct { - PatternInside string `yaml:"pattern-inside,omitempty"` - PatternNotInside string `yaml:"pattern-not-inside,omitempty"` -} - -type PatternCheckerFile struct { - Language string `yaml:"language"` - Code string `yaml:"name"` - Message string `yaml:"message"` - Category config.Category `yaml:"category"` - Severity config.Severity `yaml:"severity"` - // Pattern is a single pattern in the form of: - // pattern: (some_pattern) - // in the YAML file - Pattern string `yaml:"pattern,omitempty"` - // Patterns are ultiple patterns in the form of: - // pattern: (something) - // in the YAML file - Patterns []string `yaml:"patterns,omitempty"` - Description string `yaml:"description,omitempty"` - Filters []filterYAML `yaml:"filters,omitempty"` - Exclude []string `yaml:"exclude,omitempty"` - Include []string `yaml:"include,omitempty"` -} - -// DecodeLanguage converts a stringified language name to its corresponding -// Language enum -func DecodeLanguage(language string) Language { - language = strings.ToLower(language) - switch language { - case "javascript", "js": - return LangJs - case "typescript", "ts": - return LangTs - case "jsx", "tsx": - return LangTsx - case "python", "py": - return LangPy - case "ocaml", "ml": - return LangOCaml - case "docker", "dockerfile": - return LangDockerfile - case "java": - return LangJava - case "kotlin", "kt": - return LangKotlin - case "rust", "rs": - return LangRust - case "ruby", "rb": - return LangRuby - case "lua": - return LangLua - case "yaml", "yml": - return LangYaml - case "sql": - return LangSql - case "css", "css3": - return LangCss - case "markdown", "md": - return LangMarkdown - case "sh", "bash": - return LangBash - case "csharp", "cs": - return LangCsharp - case "elixir", "ex": - return LangElixir - case "elm": - return LangElm - case "go": - return LangGo - case "groovy": - return LangGroovy - case "hcl", "tf": - return LangHcl - case "html": - return LangHtml - case "php": - return LangPhp - case "scala": - return LangScala - case "swift": - return LangSwift - default: - return LangUnknown - } -} - -// ReadFromFile reads a pattern checker definition from a YAML config file. -func ReadFromFile(filePath string) (YamlChecker, error) { - fileContent, err := os.ReadFile(filePath) - if err != nil { - return nil, err - } - - return ReadFromBytes(fileContent) -} - -// ReadFromBytes reads a pattern checker definition from bytes array -func ReadFromBytes(fileContent []byte) (YamlChecker, error) { - var checker PatternCheckerFile - if err := yaml.Unmarshal(fileContent, &checker); err != nil { - return nil, err - } - - lang := DecodeLanguage(checker.Language) - if lang == LangUnknown { - return nil, fmt.Errorf("unknown language code: '%s'", checker.Language) - } - - if checker.Code == "" { - return nil, fmt.Errorf("no name provided in checker definition") - } - - if checker.Message == "" { - return nil, fmt.Errorf("no message provided in checker '%s'", checker.Code) - } - - var patterns []*sitter.Query - if checker.Pattern != "" { - pattern, err := sitter.NewQuery([]byte(checker.Pattern), lang.Grammar()) - if err != nil { - return nil, err - } - patterns = append(patterns, pattern) - } else if len(checker.Patterns) > 0 { - for _, patternStr := range checker.Patterns { - pattern, err := sitter.NewQuery([]byte(patternStr), lang.Grammar()) - if err != nil { - return nil, err - } - patterns = append(patterns, pattern) - } - } else { - return nil, fmt.Errorf("no pattern provided in checker '%s'", checker.Code) - } - - if checker.Pattern != "" && len(checker.Patterns) > 0 { - return nil, fmt.Errorf("only one of 'pattern' or 'patterns' can be provided in a checker definition") - } - - // include and exclude patterns - var pathFilter *PathFilter - if checker.Exclude != nil || checker.Include != nil { - pathFilter = &PathFilter{ - ExcludeGlobs: make([]glob.Glob, 0, len(checker.Exclude)), - IncludeGlobs: make([]glob.Glob, 0, len(checker.Include)), - } - - for _, exclude := range checker.Exclude { - g, err := glob.Compile(exclude) - if err != nil { - return nil, err - } - pathFilter.ExcludeGlobs = append(pathFilter.ExcludeGlobs, g) - } - - for _, include := range checker.Include { - g, err := glob.Compile(include) - if err != nil { - return nil, err - } - pathFilter.IncludeGlobs = append(pathFilter.IncludeGlobs, g) - } - } - - // node filters - var filters []NodeFilter - if checker.Filters != nil { - for _, filter := range checker.Filters { - if filter.PatternInside != "" { - queryStr := filter.PatternInside + " @" + filterPatternKey - query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) - if err != nil { - return nil, err - } - - filters = append(filters, NodeFilter{ - query: query, - shouldMatch: true, - }) - } - - if filter.PatternNotInside != "" { - queryStr := filter.PatternNotInside + " @" + filterPatternKey - query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) - if err != nil { - return nil, err - } - - filters = append(filters, NodeFilter{ - query: query, - shouldMatch: false, - }) - } - } - } - - patternChecker := &patternCheckerImpl{ - language: lang, - patterns: patterns, - issueMessage: checker.Message, - issueId: checker.Code, - pathFilter: pathFilter, - filters: filters, - } - - return patternChecker, nil -} +// import ( +// "fmt" +// "os" +// "strings" + +// "github.com/gobwas/glob" +// sitter "github.com/smacker/go-tree-sitter" +// analysis "globstar.dev/analysis" +// "globstar.dev/pkg/config" +// "gopkg.in/yaml.v3" +// ) + +// // To get a node back from a tree-sitter query, it *must* have a capture name. +// // So: (call_expression) will match nothing, but (call_expression) @some_key +// // will match all call expressions. +// // For filtering patterns with clauses in the yaml file, like: +// // filters: +// // - pattern-inside: (call_expression) +// // - pattern-not-inside: (catch_block) +// // +// // We need a to append a key name at the end of the pattern written by the user. +// // This is the key that we will use. +// const filterPatternKey = "__filter__key__" + +// // A YamlChecker is a checker that matches a tree-sitter query pattern +// // and reports an issue when the pattern is found. +// // Unlike regular issues, PatternCheckers are not associated with a specific node type, rather +// // they are invoked for *every* node that matches the pattern. +// type YamlChecker interface { +// Name() string +// Patterns() []*sitter.Query +// Language() Language +// Category() config.Category +// Severity() config.Severity +// OnMatch( +// ana *Analyzer, // the analyzer instance +// matchedQuery *sitter.Query, // the query that found an AST node +// matchedNode *sitter.Node, // the AST node that matched the query +// captures []sitter.QueryCapture, // list of captures made inside the query +// ) +// PathFilter() *PathFilter +// NodeFilters() []NodeFilter +// } + +// // NodeFilter is a filter that can be applied to a PatternChecker to restrict +// // the the nodes that the checker is applied to. +// // The checker is only applied to nodes that have a parent matching (or not matching) the query. +// type NodeFilter struct { +// query *sitter.Query +// shouldMatch bool +// } + +// // PathFilter is a glob that can be applied to a PatternChecker to restrict +// // the files that the checker is applied to. +// type PathFilter struct { +// ExcludeGlobs []glob.Glob +// IncludeGlobs []glob.Glob +// } + +// type patternCheckerImpl struct { +// language Language +// patterns []*sitter.Query +// issueMessage string +// issueId string +// category config.Category +// severity config.Severity +// pathFilter *PathFilter +// filters []NodeFilter +// } + +// func (r *patternCheckerImpl) Language() Language { +// return r.language +// } + +// func (r *patternCheckerImpl) Patterns() []*sitter.Query { +// return r.patterns +// } + +// func (r *patternCheckerImpl) OnMatch( +// ana *Analyzer, +// matchedQuery *sitter.Query, +// matchedNode *sitter.Node, +// captures []sitter.QueryCapture, +// ) { + +// // replace all '@' with the corresponding capture value +// message := r.issueMessage +// // TODO: 1. escape '@' in the message, 2. use a more efficient way to replace +// for strings.ContainsRune(message, '@') { +// for _, capture := range captures { +// captureName := matchedQuery.CaptureNameForId(capture.Index) +// message = strings.ReplaceAll( +// message, +// "@"+captureName, +// capture.Node.Content(ana.ParseResult.Source), +// ) +// } +// } + +// raisedIssue := &analysis.Issue{ +// Message: message, +// Filepath: ana.ParseResult.FilePath, +// Category: analysis.Category(r.Category()), +// Severity: analysis.Severity(r.Severity()), +// Id: &r.issueId, +// Node: matchedNode, +// } + +// filepath := ana.ParseResult.FilePath +// skipComments := fileSkipComment[filepath] +// if !ana.ContainsSkipcq(skipComments, raisedIssue) { +// ana.Report(raisedIssue) +// } +// } + +// func (r *patternCheckerImpl) Name() string { +// return r.issueId +// } + +// func (r *patternCheckerImpl) PathFilter() *PathFilter { +// return r.pathFilter +// } + +// func (r *patternCheckerImpl) NodeFilters() []NodeFilter { +// return r.filters +// } + +// func (r *patternCheckerImpl) Category() config.Category { +// return r.category +// } + +// func (r *patternCheckerImpl) Severity() config.Severity { +// return r.severity +// } + +// func CreatePatternChecker( +// patterns []*sitter.Query, +// language Language, +// issueMessage string, +// issueId string, +// pathFilter *PathFilter, +// ) YamlChecker { +// return &patternCheckerImpl{ +// language: language, +// patterns: patterns, +// issueMessage: issueMessage, +// issueId: issueId, +// pathFilter: pathFilter, +// } +// } + +// type filterYAML struct { +// PatternInside string `yaml:"pattern-inside,omitempty"` +// PatternNotInside string `yaml:"pattern-not-inside,omitempty"` +// } + +// type PatternCheckerFile struct { +// Language string `yaml:"language"` +// Code string `yaml:"name"` +// Message string `yaml:"message"` +// Category config.Category `yaml:"category"` +// Severity config.Severity `yaml:"severity"` +// // Pattern is a single pattern in the form of: +// // pattern: (some_pattern) +// // in the YAML file +// Pattern string `yaml:"pattern,omitempty"` +// // Patterns are ultiple patterns in the form of: +// // pattern: (something) +// // in the YAML file +// Patterns []string `yaml:"patterns,omitempty"` +// Description string `yaml:"description,omitempty"` +// Filters []filterYAML `yaml:"filters,omitempty"` +// Exclude []string `yaml:"exclude,omitempty"` +// Include []string `yaml:"include,omitempty"` +// } + +// // DecodeLanguage converts a stringified language name to its corresponding +// // Language enum +// func DecodeLanguage(language string) Language { +// language = strings.ToLower(language) +// switch language { +// case "javascript", "js": +// return LangJs +// case "typescript", "ts": +// return LangTs +// case "jsx", "tsx": +// return LangTsx +// case "python", "py": +// return LangPy +// case "ocaml", "ml": +// return LangOCaml +// case "docker", "dockerfile": +// return LangDockerfile +// case "java": +// return LangJava +// case "kotlin", "kt": +// return LangKotlin +// case "rust", "rs": +// return LangRust +// case "ruby", "rb": +// return LangRuby +// case "lua": +// return LangLua +// case "yaml", "yml": +// return LangYaml +// case "sql": +// return LangSql +// case "css", "css3": +// return LangCss +// case "markdown", "md": +// return LangMarkdown +// case "sh", "bash": +// return LangBash +// case "csharp", "cs": +// return LangCsharp +// case "elixir", "ex": +// return LangElixir +// case "elm": +// return LangElm +// case "go": +// return LangGo +// case "groovy": +// return LangGroovy +// case "hcl", "tf": +// return LangHcl +// case "html": +// return LangHtml +// case "php": +// return LangPhp +// case "scala": +// return LangScala +// case "swift": +// return LangSwift +// default: +// return LangUnknown +// } +// } + +// // ReadFromFile reads a pattern checker definition from a YAML config file. +// func ReadFromFile(filePath string) (Analyzer, error) { +// fileContent, err := os.ReadFile(filePath) +// if err != nil { +// return Analyzer{}, err +// } + +// return ReadFromBytes(fileContent) +// } + +// // ReadFromBytes reads a pattern checker definition from bytes array +// func ReadFromBytes(fileContent []byte) (Analyzer, error) { +// var checker PatternCheckerFile +// if err := yaml.Unmarshal(fileContent, &checker); err != nil { +// return Analyzer{}, err +// } + +// lang := DecodeLanguage(checker.Language) +// if lang == LangUnknown { +// return nil, fmt.Errorf("unknown language code: '%s'", checker.Language) +// } + +// if checker.Code == "" { +// return nil, fmt.Errorf("no name provided in checker definition") +// } + +// if checker.Message == "" { +// return nil, fmt.Errorf("no message provided in checker '%s'", checker.Code) +// } + +// var patterns []*sitter.Query +// if checker.Pattern != "" { +// pattern, err := sitter.NewQuery([]byte(checker.Pattern), lang.Grammar()) +// if err != nil { +// return nil, err +// } +// patterns = append(patterns, pattern) +// } else if len(checker.Patterns) > 0 { +// for _, patternStr := range checker.Patterns { +// pattern, err := sitter.NewQuery([]byte(patternStr), lang.Grammar()) +// if err != nil { +// return nil, err +// } +// patterns = append(patterns, pattern) +// } +// } else { +// return nil, fmt.Errorf("no pattern provided in checker '%s'", checker.Code) +// } + +// if checker.Pattern != "" && len(checker.Patterns) > 0 { +// return nil, fmt.Errorf("only one of 'pattern' or 'patterns' can be provided in a checker definition") +// } + +// // include and exclude patterns +// var pathFilter *PathFilter +// if checker.Exclude != nil || checker.Include != nil { +// pathFilter = &PathFilter{ +// ExcludeGlobs: make([]glob.Glob, 0, len(checker.Exclude)), +// IncludeGlobs: make([]glob.Glob, 0, len(checker.Include)), +// } + +// for _, exclude := range checker.Exclude { +// g, err := glob.Compile(exclude) +// if err != nil { +// return nil, err +// } +// pathFilter.ExcludeGlobs = append(pathFilter.ExcludeGlobs, g) +// } + +// for _, include := range checker.Include { +// g, err := glob.Compile(include) +// if err != nil { +// return nil, err +// } +// pathFilter.IncludeGlobs = append(pathFilter.IncludeGlobs, g) +// } +// } + +// // node filters +// var filters []NodeFilter +// if checker.Filters != nil { +// for _, filter := range checker.Filters { +// if filter.PatternInside != "" { +// queryStr := filter.PatternInside + " @" + filterPatternKey +// query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) +// if err != nil { +// return nil, err +// } + +// filters = append(filters, NodeFilter{ +// query: query, +// shouldMatch: true, +// }) +// } + +// if filter.PatternNotInside != "" { +// queryStr := filter.PatternNotInside + " @" + filterPatternKey +// query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) +// if err != nil { +// return nil, err +// } + +// filters = append(filters, NodeFilter{ +// query: query, +// shouldMatch: false, +// }) +// } +// } +// } + +// patternChecker := &patternCheckerImpl{ +// language: lang, +// patterns: patterns, +// issueMessage: checker.Message, +// issueId: checker.Code, +// pathFilter: pathFilter, +// filters: filters, +// } +// return patternChecker, nil +// } \ No newline at end of file diff --git a/pkg/analysis/rule.go b/pkg/analysis/rule.go index bbb76443..2ba6a658 100644 --- a/pkg/analysis/rule.go +++ b/pkg/analysis/rule.go @@ -2,7 +2,7 @@ package analysis import sitter "github.com/smacker/go-tree-sitter" -type VisitFn func(checker Checker, analyzer *Analyzer, node *sitter.Node) +type VisitFn func(checker Checker, node *sitter.Node) type Checker interface { NodeType() string diff --git a/pkg/cli/cli.go b/pkg/cli/cli.go index d66f0fd2..495beb5a 100644 --- a/pkg/cli/cli.go +++ b/pkg/cli/cli.go @@ -193,13 +193,14 @@ to run only the built-in checkers, and --checkers=all to run both.`, // Track test failures but continue running all tests var testsFailed bool - yamlPassed, err := runTests(analysisDir) + _, _, yamlPassed, err := goAnalysis.RunAnalyzerTests(analysisDir, []*goAnalysis.Analyzer{}) if err != nil { err = fmt.Errorf("error running YAML tests: %w", err) fmt.Fprintln(os.Stderr, err.Error()) // Don't return immediately, continue with other tests } if !yamlPassed { + return fmt.Errorf("YAML tests failed ") testsFailed = true } @@ -294,30 +295,30 @@ func (c *Cli) buildCustomGoCheckers() error { return nil } -func (c *Cli) CheckFile( - checkersMap map[analysis.Language][]analysis.Checker, - patternCheckers map[analysis.Language][]analysis.YamlChecker, - path string, -) ([]*goAnalysis.Issue, error) { - lang := analysis.LanguageFromFilePath(path) - checkers := checkersMap[lang] - if checkers == nil && patternCheckers == nil { - // no checkers are registered for this language - return nil, nil - } - - analyzer, err := analysis.FromFile(path, checkers) - if err != nil { - return nil, err - } - analyzer.WorkDir = c.RootDirectory - - if patternCheckers != nil { - analyzer.YamlCheckers = patternCheckers[lang] - } - - return analyzer.Analyze(), nil -} +// func (c *Cli) CheckFile( +// checkersMap map[goAnalysis.Language][]goAnalysis.Analyzer, +// patternCheckers map[goAnalysis.Language][]goAnalysis.Analyzer, +// path string, +// ) ([]*goAnalysis.Issue, error) { +// lang := goAnalysis.LanguageFromFilePath(path) +// checkers := checkersMap[lang] +// if checkers == nil && patternCheckers == nil { +// // no checkers are registered for this language +// return nil, nil +// } + +// analyzer, err := analysis.FromFile(path, checkers) +// if err != nil { +// return nil, err +// } +// analyzer.WorkDir = c.RootDirectory + +// if patternCheckers != nil { +// analyzer.YamlCheckers = patternCheckers[lang] +// } + +// return analyzer.Analyze(), nil +// } type checkResult struct { issues []*goAnalysis.Issue @@ -360,7 +361,7 @@ var defaultIgnoreDirs = []string{ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) - patternCheckers := make(map[analysis.Language][]analysis.YamlChecker) + patternCheckers := make(map[goAnalysis.Language][]goAnalysis.Analyzer) var goAnalyzers []*goAnalysis.Analyzer if runBuiltinCheckers { @@ -443,8 +444,8 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { } } - language := analysis.LanguageFromFilePath(path) - if language == analysis.LangUnknown { + language := goAnalysis.LanguageFromFilePath(path) + if language == goAnalysis.LangUnknown { return nil } @@ -453,7 +454,15 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { // run checker // the first arg is empty, since the format for inbuilt Go-based checkers has changed // TODO: factor it in later - issues, err := c.CheckFile(map[analysis.Language][]analysis.Checker{}, patternCheckers, path) + nonYamlAnalyzers := []*goAnalysis.Analyzer{} + issues, err := goAnalysis.RunAnalyzers(c.RootDirectory, nonYamlAnalyzers, func(filename string) bool { + if c.CmpHash != "" { + _, isChanged := changedFileMap[filename] + return isChanged + } + return true + }) + if err != nil { // parse error on a single file should not exit the entire analysis process // TODO: logging the below error message is not helpful, as it logs unsupported file types as well @@ -465,7 +474,14 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { txt, _ := issue.AsText() log.Error().Msg(string(txt)) - result.issues = append(result.issues, issue) + result.issues = append(result.issues, &goAnalysis.Issue{ + Filepath: issue.Filepath, + Message: issue.Message, + Severity: goAnalysis.Severity(issue.Severity), + Category: goAnalysis.Category(issue.Category), + Node: issue.Node, + Id: issue.Id, + }) } return nil diff --git a/pkg/cli/test_runner.go b/pkg/cli/test_runner.go index a951daf4..31d00123 100644 --- a/pkg/cli/test_runner.go +++ b/pkg/cli/test_runner.go @@ -1,180 +1,180 @@ package cli -import ( - "bufio" - "fmt" - "io/fs" - "os" - "path/filepath" - "slices" - "strings" - - "globstar.dev/pkg/analysis" -) - -func runTests(dir string) (bool, error) { - passed, err := runTestCases(dir) - if err != nil { - return false, err - } - - return passed, nil -} - -type testCase struct { - yamlCheckerPath string - testFile string -} - -func findTestCases(dir string) ([]testCase, error) { - var pairs []testCase // List of checker file/test file pairs - - err := filepath.Walk(dir, func(path string, d fs.FileInfo, err error) error { - if err != nil { - return nil - } - - if d.IsDir() { - return nil - } - - if d.Mode()&fs.ModeSymlink != 0 { - // skip symlinks - return nil - } - - fileExt := filepath.Ext(path) - isYamlFile := fileExt == ".yaml" || fileExt == ".yml" - if !isYamlFile { - return nil - } - - patternChecker, err := analysis.ReadFromFile(path) - if err != nil { - fmt.Fprintf(os.Stderr, "invalid checker '%s': %s\n", d.Name(), err.Error()) - return nil - } - - testFile := strings.TrimSuffix(path, fileExt) + ".test" + analysis.GetExtFromLanguage(patternChecker.Language()) - - if _, err := os.Stat(testFile); os.IsNotExist(err) { - testFile = "" - } - - pairs = append(pairs, testCase{ - yamlCheckerPath: path, - testFile: testFile, - }) - - return nil - }) - - return pairs, err -} - -func runTestCases(dir string) (passed bool, err error) { - testCases, err := findTestCases(dir) - if err != nil { - return false, err - } - - if len(testCases) == 0 { - return false, fmt.Errorf("no test cases found") - } - - passed = true - for _, tc := range testCases { - if tc.testFile == "" { - fmt.Fprintf(os.Stderr, "No test cases found for test: %s\n", filepath.Base(tc.yamlCheckerPath)) - continue - } - - fmt.Fprintf(os.Stderr, "Running test case: %s\n", filepath.Base(tc.yamlCheckerPath)) - // Read and parse the checker definition - checker, err := analysis.ReadFromFile(tc.yamlCheckerPath) - if err != nil { - return false, err - } - - // Parse the test file - analyzer, err := analysis.FromFile(tc.testFile, []analysis.Checker{}) - if err != nil { - return false, err - } - analyzer.WorkDir = dir - analyzer.YamlCheckers = append(analyzer.YamlCheckers, checker) - issues := analyzer.Analyze() - - want, err := findExpectedLines(tc.testFile) - if err != nil { - return false, err - } - - var got []int - for _, issue := range issues { - got = append(got, int(issue.Node.Range().StartPoint.Row)+1) // 0-indexed to 1-indexed - } - - slices.Sort(got) - - testName := filepath.Base(tc.testFile) - - if len(want) != len(got) { - message := fmt.Sprintf( - "(%s): expected issues on the following lines: %v\nbut issues were raised on lines: %v\n", - testName, - want, - got, - ) - - fmt.Fprintf(os.Stderr, "%s", message) - passed = false - continue - } - - for i := range want { - if want[i] != got[i] { - message := fmt.Sprintf( - "(%s): expected issue on line %d, but next occurrence is on line %d\n", - testName, - want, - got, - ) - - fmt.Fprintf(os.Stderr, "%s\n", message) - passed = false - } - } - } - - return passed, nil -} - -// findExpectedLines reads a file and returns line numbers containing "" -// (incremented by 1). -func findExpectedLines(filePath string) ([]int, error) { - file, err := os.Open(filePath) - if err != nil { - return nil, err - } - defer file.Close() - - var expectedLines []int - scanner := bufio.NewScanner(file) - - lineNumber := 0 - for scanner.Scan() { - text := strings.ToLower(scanner.Text()) - lineNumber++ - if strings.Contains(text, "") || strings.Contains(text, "") { - expectedLines = append(expectedLines, lineNumber+1) - } - } - - // Check for scanner errors - if err := scanner.Err(); err != nil { - return nil, err - } - - return expectedLines, nil -} +// import ( +// "bufio" +// "fmt" +// "io/fs" +// "os" +// "path/filepath" +// "slices" +// "strings" + +// "globstar.dev/pkg/analysis" +// ) + +// func runTests(dir string) (bool, error) { +// passed, err := runTestCases(dir) +// if err != nil { +// return false, err +// } + +// return passed, nil +// } + +// type testCase struct { +// yamlCheckerPath string +// testFile string +// } + +// func findTestCases(dir string) ([]testCase, error) { +// var pairs []testCase // List of checker file/test file pairs + +// err := filepath.Walk(dir, func(path string, d fs.FileInfo, err error) error { +// if err != nil { +// return nil +// } + +// if d.IsDir() { +// return nil +// } + +// if d.Mode()&fs.ModeSymlink != 0 { +// // skip symlinks +// return nil +// } + +// fileExt := filepath.Ext(path) +// isYamlFile := fileExt == ".yaml" || fileExt == ".yml" +// if !isYamlFile { +// return nil +// } + +// patternChecker, err := analysis.ReadFromFile(path) +// if err != nil { +// fmt.Fprintf(os.Stderr, "invalid checker '%s': %s\n", d.Name(), err.Error()) +// return nil +// } + +// testFile := strings.TrimSuffix(path, fileExt) + ".test" + analysis.GetExtFromLanguage(patternChecker.Language) + +// if _, err := os.Stat(testFile); os.IsNotExist(err) { +// testFile = "" +// } + +// pairs = append(pairs, testCase{ +// yamlCheckerPath: path, +// testFile: testFile, +// }) + +// return nil +// }) + +// return pairs, err +// } + +// func runTestCases(dir string) (passed bool, err error) { +// testCases, err := findTestCases(dir) +// if err != nil { +// return false, err +// } + +// if len(testCases) == 0 { +// return false, fmt.Errorf("no test cases found") +// } + +// passed = true +// for _, tc := range testCases { +// if tc.testFile == "" { +// fmt.Fprintf(os.Stderr, "No test cases found for test: %s\n", filepath.Base(tc.yamlCheckerPath)) +// continue +// } + +// fmt.Fprintf(os.Stderr, "Running test case: %s\n", filepath.Base(tc.yamlCheckerPath)) +// // Read and parse the checker definition +// checker, err := analysis.ReadFromFile(tc.yamlCheckerPath) +// if err != nil { +// return false, err +// } + +// // Parse the test file +// analyzer, err := analysis.FromFile(tc.testFile, []analysis.Checker{}) +// if err != nil { +// return false, err +// } +// analyzer.WorkDir = dir +// analyzer.Analyzers = append(analyzer.Analyzers, checker) +// issues := analyzer.Analyze() + +// want, err := findExpectedLines(tc.testFile) +// if err != nil { +// return false, err +// } + +// var got []int +// for _, issue := range issues { +// got = append(got, int(issue.Node.Range().StartPoint.Row)+1) // 0-indexed to 1-indexed +// } + +// slices.Sort(got) + +// testName := filepath.Base(tc.testFile) + +// if len(want) != len(got) { +// message := fmt.Sprintf( +// "(%s): expected issues on the following lines: %v\nbut issues were raised on lines: %v\n", +// testName, +// want, +// got, +// ) + +// fmt.Fprintf(os.Stderr, "%s", message) +// passed = false +// continue +// } + +// for i := range want { +// if want[i] != got[i] { +// message := fmt.Sprintf( +// "(%s): expected issue on line %d, but next occurrence is on line %d\n", +// testName, +// want, +// got, +// ) + +// fmt.Fprintf(os.Stderr, "%s\n", message) +// passed = false +// } +// } +// } + +// return passed, nil +// } + +// // findExpectedLines reads a file and returns line numbers containing "" +// // (incremented by 1). +// func findExpectedLines(filePath string) ([]int, error) { +// file, err := os.Open(filePath) +// if err != nil { +// return nil, err +// } +// defer file.Close() + +// var expectedLines []int +// scanner := bufio.NewScanner(file) + +// lineNumber := 0 +// for scanner.Scan() { +// text := strings.ToLower(scanner.Text()) +// lineNumber++ +// if strings.Contains(text, "") || strings.Contains(text, "") { +// expectedLines = append(expectedLines, lineNumber+1) +// } +// } + +// // Check for scanner errors +// if err := scanner.Err(); err != nil { +// return nil, err +// } + +// return expectedLines, nil +// } From 2c5f6811e0be2310ba375b08ef1f445efd1db959 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Fri, 30 May 2025 16:04:07 +0530 Subject: [PATCH 03/12] feat: all dependencies to pkg/analysis removed --- analysis/language.go | 2 +- analysis/testrunner.go | 10 +- analysis/yaml.go | 5 +- pkg/analysis/language.go | 596 +++++++++++++++++----------------- pkg/analysis/rule.go | 52 +-- pkg/analysis/scope.go | 368 ++++++++++----------- pkg/analysis/scope_ts.go | 584 ++++++++++++++++----------------- pkg/analysis/scope_ts_test.go | 262 +++++++-------- pkg/analysis/walk.go | 162 ++++----- pkg/cli/cli.go | 71 ++-- pkg/cli/test_runner.go | 358 ++++++++++---------- 11 files changed, 1238 insertions(+), 1232 deletions(-) diff --git a/analysis/language.go b/analysis/language.go index 621afc0f..8a8f051b 100644 --- a/analysis/language.go +++ b/analysis/language.go @@ -230,7 +230,7 @@ func LanguageFromFilePath(path string) Language { return LangYaml case ".css": return LangCss - case ".dockerfile": + case ".dockerfile", ".Dockerfile": return LangDockerfile case ".md": return LangMarkdown diff --git a/analysis/testrunner.go b/analysis/testrunner.go index 1558468b..d99be964 100644 --- a/analysis/testrunner.go +++ b/analysis/testrunner.go @@ -254,11 +254,11 @@ func RunAnalyzerTests(testDir string, analyzers []*Analyzer) (string, string, bo // if there's a test file in the testDir for which there's no analyzer, // it's most likely a YAML checker test, so skip it - yamlAnalyzers, err := discoverYamlAnalyzers(testDir) - if err != nil { - return "", "", false, err - } - analyzers = append(analyzers, yamlAnalyzers...) + // yamlAnalyzers, err := discoverYamlAnalyzers(testDir) + // if err != nil { + // return "", "", false, err + // } + // analyzers = append(analyzers, yamlAnalyzers...) likelyTestFiles := []string{} for _, analyzer := range analyzers { diff --git a/analysis/yaml.go b/analysis/yaml.go index 8e4e8888..7dc57c3e 100644 --- a/analysis/yaml.go +++ b/analysis/yaml.go @@ -181,6 +181,7 @@ func ReadFromBytes(fileContent []byte) (Analyzer, error) { } patternChecker := &Analyzer{ + Name: checker.Code, Language: lang, Description: checker.Description, Category: checker.Category, @@ -189,6 +190,7 @@ func ReadFromBytes(fileContent []byte) (Analyzer, error) { yamlAnalyzer := &YamlAnalyzer{ Analyzer: Analyzer{ + Name: checker.Code, Language: lang, Description: checker.Description, Category: checker.Category, @@ -225,8 +227,9 @@ func RunYamlAnalyzer(YamlAnalyzer *YamlAnalyzer) func(pass *Pass) (any, error) { captureName := query.CaptureNameForId(capture.Index) message = strings.ReplaceAll(message, "@"+captureName, capture.Node.Content(pass.FileContext.Source)) } + + pass.Report(pass, capture.Node, message) } - pass.Report(pass, capture.Node, YamlAnalyzer.Message) } } diff --git a/pkg/analysis/language.go b/pkg/analysis/language.go index 41d7974a..a30046f2 100644 --- a/pkg/analysis/language.go +++ b/pkg/analysis/language.go @@ -1,316 +1,316 @@ package analysis -import ( - "context" - "fmt" - "os" - "path/filepath" +// import ( +// "context" +// "fmt" +// "os" +// "path/filepath" - sitter "github.com/smacker/go-tree-sitter" +// sitter "github.com/smacker/go-tree-sitter" - treeSitterBash "github.com/smacker/go-tree-sitter/bash" - treeSitterCsharp "github.com/smacker/go-tree-sitter/csharp" - treeSitterCss "github.com/smacker/go-tree-sitter/css" - treeSitterDockerfile "github.com/smacker/go-tree-sitter/dockerfile" - treeSitterElixir "github.com/smacker/go-tree-sitter/elixir" - treeSitterElm "github.com/smacker/go-tree-sitter/elm" - treeSitterGo "github.com/smacker/go-tree-sitter/golang" - treeSitterGroovy "github.com/smacker/go-tree-sitter/groovy" - treeSitterHcl "github.com/smacker/go-tree-sitter/hcl" - treeSitterHtml "github.com/smacker/go-tree-sitter/html" - treeSitterJava "github.com/smacker/go-tree-sitter/java" - treeSitterKotlin "github.com/smacker/go-tree-sitter/kotlin" - treeSitterLua "github.com/smacker/go-tree-sitter/lua" - treeSitterOCaml "github.com/smacker/go-tree-sitter/ocaml" - treeSitterPhp "github.com/smacker/go-tree-sitter/php" - treeSitterPy "github.com/smacker/go-tree-sitter/python" - treeSitterRuby "github.com/smacker/go-tree-sitter/ruby" - treeSitterRust "github.com/smacker/go-tree-sitter/rust" - treeSitterScala "github.com/smacker/go-tree-sitter/scala" - treeSitterSql "github.com/smacker/go-tree-sitter/sql" - treeSitterSwift "github.com/smacker/go-tree-sitter/swift" - treeSitterTsx "github.com/smacker/go-tree-sitter/typescript/tsx" - treeSitterTs "github.com/smacker/go-tree-sitter/typescript/typescript" -) +// treeSitterBash "github.com/smacker/go-tree-sitter/bash" +// treeSitterCsharp "github.com/smacker/go-tree-sitter/csharp" +// treeSitterCss "github.com/smacker/go-tree-sitter/css" +// treeSitterDockerfile "github.com/smacker/go-tree-sitter/dockerfile" +// treeSitterElixir "github.com/smacker/go-tree-sitter/elixir" +// treeSitterElm "github.com/smacker/go-tree-sitter/elm" +// treeSitterGo "github.com/smacker/go-tree-sitter/golang" +// treeSitterGroovy "github.com/smacker/go-tree-sitter/groovy" +// treeSitterHcl "github.com/smacker/go-tree-sitter/hcl" +// treeSitterHtml "github.com/smacker/go-tree-sitter/html" +// treeSitterJava "github.com/smacker/go-tree-sitter/java" +// treeSitterKotlin "github.com/smacker/go-tree-sitter/kotlin" +// treeSitterLua "github.com/smacker/go-tree-sitter/lua" +// treeSitterOCaml "github.com/smacker/go-tree-sitter/ocaml" +// treeSitterPhp "github.com/smacker/go-tree-sitter/php" +// treeSitterPy "github.com/smacker/go-tree-sitter/python" +// treeSitterRuby "github.com/smacker/go-tree-sitter/ruby" +// treeSitterRust "github.com/smacker/go-tree-sitter/rust" +// treeSitterScala "github.com/smacker/go-tree-sitter/scala" +// treeSitterSql "github.com/smacker/go-tree-sitter/sql" +// treeSitterSwift "github.com/smacker/go-tree-sitter/swift" +// treeSitterTsx "github.com/smacker/go-tree-sitter/typescript/tsx" +// treeSitterTs "github.com/smacker/go-tree-sitter/typescript/typescript" +// ) -// ParseResult is the result of parsing a file. -type ParseResult struct { - // Ast is the root node of the tree-sitter parse-tree - // representing this file - Ast *sitter.Node - // Source is the raw source code of the file - Source []byte - // FilePath is the path to the file that was parsed - FilePath string - // Language is the tree-sitter language used to parse the file - TsLanguage *sitter.Language - // Language is the language of the file - Language Language - // ScopeTree represents the scope hierarchy of the file. - // Can be nil if scope support for this language has not been implemented yet. - ScopeTree *ScopeTree -} +// // ParseResult is the result of parsing a file. +// type ParseResult struct { +// // Ast is the root node of the tree-sitter parse-tree +// // representing this file +// Ast *sitter.Node +// // Source is the raw source code of the file +// Source []byte +// // FilePath is the path to the file that was parsed +// FilePath string +// // Language is the tree-sitter language used to parse the file +// TsLanguage *sitter.Language +// // Language is the language of the file +// Language Language +// // ScopeTree represents the scope hierarchy of the file. +// // Can be nil if scope support for this language has not been implemented yet. +// ScopeTree *ScopeTree +// } -type Language int +// type Language int -const ( - LangUnknown Language = iota - LangPy - LangJs // vanilla JS and JSX - LangTs // TypeScript (not TSX) - LangTsx // TypeScript with JSX extension - LangJava - LangRuby - LangRust - LangYaml - LangCss - LangDockerfile - LangMarkdown - LangSql - LangKotlin - LangOCaml - LangLua - LangBash - LangCsharp - LangElixir - LangElm - LangGo - LangGroovy - LangHcl - LangHtml - LangPhp - LangScala - LangSwift -) +// const ( +// LangUnknown Language = iota +// LangPy +// LangJs // vanilla JS and JSX +// LangTs // TypeScript (not TSX) +// LangTsx // TypeScript with JSX extension +// LangJava +// LangRuby +// LangRust +// LangYaml +// LangCss +// LangDockerfile +// LangMarkdown +// LangSql +// LangKotlin +// LangOCaml +// LangLua +// LangBash +// LangCsharp +// LangElixir +// LangElm +// LangGo +// LangGroovy +// LangHcl +// LangHtml +// LangPhp +// LangScala +// LangSwift +// ) -// tsGrammarForLang returns the tree-sitter grammar for the given language. -// May return `nil` when `lang` is `LangUnkown`. -func (lang Language) Grammar() *sitter.Language { - switch lang { - case LangPy: - return treeSitterPy.GetLanguage() - case LangJs: - return treeSitterTsx.GetLanguage() // Use TypeScript's JSX grammar for JS/JSX - case LangTs: - return treeSitterTs.GetLanguage() - case LangTsx: - return treeSitterTsx.GetLanguage() - case LangJava: - return treeSitterJava.GetLanguage() - case LangRuby: - return treeSitterRuby.GetLanguage() - case LangRust: - return treeSitterRust.GetLanguage() - case LangSql: - return treeSitterSql.GetLanguage() - case LangKotlin: - return treeSitterKotlin.GetLanguage() - case LangCss: - return treeSitterCss.GetLanguage() - case LangOCaml: - return treeSitterOCaml.GetLanguage() - case LangLua: - return treeSitterLua.GetLanguage() - case LangDockerfile: - return treeSitterDockerfile.GetLanguage() - case LangBash: - return treeSitterBash.GetLanguage() - case LangCsharp: - return treeSitterCsharp.GetLanguage() - case LangElixir: - return treeSitterElixir.GetLanguage() - case LangElm: - return treeSitterElm.GetLanguage() - case LangGo: - return treeSitterGo.GetLanguage() - case LangGroovy: - return treeSitterGroovy.GetLanguage() - case LangHcl: - return treeSitterHcl.GetLanguage() - case LangHtml: - return treeSitterHtml.GetLanguage() - case LangPhp: - return treeSitterPhp.GetLanguage() - case LangScala: - return treeSitterScala.GetLanguage() - case LangSwift: - return treeSitterSwift.GetLanguage() - default: - return nil - } -} +// // tsGrammarForLang returns the tree-sitter grammar for the given language. +// // May return `nil` when `lang` is `LangUnkown`. +// func (lang Language) Grammar() *sitter.Language { +// switch lang { +// case LangPy: +// return treeSitterPy.GetLanguage() +// case LangJs: +// return treeSitterTsx.GetLanguage() // Use TypeScript's JSX grammar for JS/JSX +// case LangTs: +// return treeSitterTs.GetLanguage() +// case LangTsx: +// return treeSitterTsx.GetLanguage() +// case LangJava: +// return treeSitterJava.GetLanguage() +// case LangRuby: +// return treeSitterRuby.GetLanguage() +// case LangRust: +// return treeSitterRust.GetLanguage() +// case LangSql: +// return treeSitterSql.GetLanguage() +// case LangKotlin: +// return treeSitterKotlin.GetLanguage() +// case LangCss: +// return treeSitterCss.GetLanguage() +// case LangOCaml: +// return treeSitterOCaml.GetLanguage() +// case LangLua: +// return treeSitterLua.GetLanguage() +// case LangDockerfile: +// return treeSitterDockerfile.GetLanguage() +// case LangBash: +// return treeSitterBash.GetLanguage() +// case LangCsharp: +// return treeSitterCsharp.GetLanguage() +// case LangElixir: +// return treeSitterElixir.GetLanguage() +// case LangElm: +// return treeSitterElm.GetLanguage() +// case LangGo: +// return treeSitterGo.GetLanguage() +// case LangGroovy: +// return treeSitterGroovy.GetLanguage() +// case LangHcl: +// return treeSitterHcl.GetLanguage() +// case LangHtml: +// return treeSitterHtml.GetLanguage() +// case LangPhp: +// return treeSitterPhp.GetLanguage() +// case LangScala: +// return treeSitterScala.GetLanguage() +// case LangSwift: +// return treeSitterSwift.GetLanguage() +// default: +// return nil +// } +// } -// NOTE(@injuly): TypeScript and TSX have to parsed with DIFFERENT -// grammars. Otherwise, because an expression like `bar` is -// parsed as a (legacy) type-cast in TS, but a JSXElement in TSX. -// See: https://facebook.github.io/jsx/#prod-JSXElement +// // NOTE(@injuly): TypeScript and TSX have to parsed with DIFFERENT +// // grammars. Otherwise, because an expression like `bar` is +// // parsed as a (legacy) type-cast in TS, but a JSXElement in TSX. +// // See: https://facebook.github.io/jsx/#prod-JSXElement -// LanguageFromFilePath returns the Language of the file at the given path -// returns `LangUnkown` if the language is not recognized (e.g: `.txt` files). -func LanguageFromFilePath(path string) Language { - ext := filepath.Ext(path) - switch ext { - case ".py": - return LangPy - // TODO: .jsx and .js can both have JSX syntax -_- - case ".js", ".jsx": - return LangJs - case ".ts": - return LangTs - case ".tsx": - return LangTs - case ".java": - return LangJava - case ".rb": - return LangRuby - case ".rs": - return LangRust - case ".css": - return LangCss - case ".Dockerfile": - return LangDockerfile - case ".sql": - return LangSql - case ".kt": - return LangKotlin - case ".ml": - return LangOCaml - case ".lua": - return LangLua - case ".sh": - return LangBash - case ".cs": - return LangCsharp - case ".ex": - return LangElixir - case ".elm": - return LangElm - case ".go": - return LangGo - case ".groovy": - return LangGroovy - case ".tf": - return LangHcl - case ".html": - return LangHtml - case ".php": - return LangPhp - case ".scala": - return LangScala - case ".swift": - return LangSwift - default: - return LangUnknown - } -} +// // LanguageFromFilePath returns the Language of the file at the given path +// // returns `LangUnkown` if the language is not recognized (e.g: `.txt` files). +// func LanguageFromFilePath(path string) Language { +// ext := filepath.Ext(path) +// switch ext { +// case ".py": +// return LangPy +// // TODO: .jsx and .js can both have JSX syntax -_- +// case ".js", ".jsx": +// return LangJs +// case ".ts": +// return LangTs +// case ".tsx": +// return LangTs +// case ".java": +// return LangJava +// case ".rb": +// return LangRuby +// case ".rs": +// return LangRust +// case ".css": +// return LangCss +// case ".Dockerfile": +// return LangDockerfile +// case ".sql": +// return LangSql +// case ".kt": +// return LangKotlin +// case ".ml": +// return LangOCaml +// case ".lua": +// return LangLua +// case ".sh": +// return LangBash +// case ".cs": +// return LangCsharp +// case ".ex": +// return LangElixir +// case ".elm": +// return LangElm +// case ".go": +// return LangGo +// case ".groovy": +// return LangGroovy +// case ".tf": +// return LangHcl +// case ".html": +// return LangHtml +// case ".php": +// return LangPhp +// case ".scala": +// return LangScala +// case ".swift": +// return LangSwift +// default: +// return LangUnknown +// } +// } -func GetExtFromLanguage(lang Language) string { - switch lang { - case LangPy: - return ".py" - case LangJs: - return ".js" - case LangTs: - return ".ts" - case LangTsx: - return ".tsx" - case LangJava: - return ".java" - case LangRuby: - return ".rb" - case LangRust: - return ".rs" - case LangYaml: - return ".yaml" - case LangCss: - return ".css" - case LangDockerfile: - return ".Dockerfile" - case LangSql: - return ".sql" - case LangKotlin: - return ".kt" - case LangOCaml: - return ".ml" - case LangLua: - return ".lua" - case LangBash: - return ".sh" - case LangCsharp: - return ".cs" - case LangElixir: - return ".ex" - case LangElm: - return ".elm" - case LangGo: - return ".go" - case LangGroovy: - return ".groovy" - case LangHcl: - return ".tf" - case LangHtml: - return ".html" - case LangPhp: - return ".php" - case LangScala: - return ".scala" - case LangSwift: - return ".swift" - default: - return "" - } -} +// func GetExtFromLanguage(lang Language) string { +// switch lang { +// case LangPy: +// return ".py" +// case LangJs: +// return ".js" +// case LangTs: +// return ".ts" +// case LangTsx: +// return ".tsx" +// case LangJava: +// return ".java" +// case LangRuby: +// return ".rb" +// case LangRust: +// return ".rs" +// case LangYaml: +// return ".yaml" +// case LangCss: +// return ".css" +// case LangDockerfile: +// return ".Dockerfile" +// case LangSql: +// return ".sql" +// case LangKotlin: +// return ".kt" +// case LangOCaml: +// return ".ml" +// case LangLua: +// return ".lua" +// case LangBash: +// return ".sh" +// case LangCsharp: +// return ".cs" +// case LangElixir: +// return ".ex" +// case LangElm: +// return ".elm" +// case LangGo: +// return ".go" +// case LangGroovy: +// return ".groovy" +// case LangHcl: +// return ".tf" +// case LangHtml: +// return ".html" +// case LangPhp: +// return ".php" +// case LangScala: +// return ".scala" +// case LangSwift: +// return ".swift" +// default: +// return "" +// } +// } -func Parse(filePath string, source []byte, language Language, grammar *sitter.Language) (*ParseResult, error) { - ast, err := sitter.ParseCtx(context.Background(), source, grammar) - if err != nil { - return nil, fmt.Errorf("failed to parse %s", filePath) - } +// func Parse(filePath string, source []byte, language Language, grammar *sitter.Language) (*ParseResult, error) { +// ast, err := sitter.ParseCtx(context.Background(), source, grammar) +// if err != nil { +// return nil, fmt.Errorf("failed to parse %s", filePath) +// } - scopeTree := MakeScopeTree(language, ast, source) - parseResult := &ParseResult{ - Ast: ast, - Source: source, - FilePath: filePath, - TsLanguage: grammar, - Language: language, - ScopeTree: scopeTree, - } +// scopeTree := MakeScopeTree(language, ast, source) +// parseResult := &ParseResult{ +// Ast: ast, +// Source: source, +// FilePath: filePath, +// TsLanguage: grammar, +// Language: language, +// ScopeTree: scopeTree, +// } - return parseResult, nil -} +// return parseResult, nil +// } -// ParseFile parses the file at the given path using the appropriate -// tree-sitter grammar. -func ParseFile(filePath string) (*ParseResult, error) { - lang := LanguageFromFilePath(filePath) - grammar := lang.Grammar() - if grammar == nil { - return nil, fmt.Errorf("unsupported file type: %s", filePath) - } +// // ParseFile parses the file at the given path using the appropriate +// // tree-sitter grammar. +// func ParseFile(filePath string) (*ParseResult, error) { +// lang := LanguageFromFilePath(filePath) +// grammar := lang.Grammar() +// if grammar == nil { +// return nil, fmt.Errorf("unsupported file type: %s", filePath) +// } - source, err := os.ReadFile(filePath) - if err != nil { - return nil, err - } +// source, err := os.ReadFile(filePath) +// if err != nil { +// return nil, err +// } - return Parse(filePath, source, lang, grammar) -} +// return Parse(filePath, source, lang, grammar) +// } -func GetEscapedCommentIdentifierFromPath(path string) string { - lang := LanguageFromFilePath(path) - switch lang { - case LangJs, LangTs, LangTsx, LangJava, LangRust, LangCss, LangMarkdown, LangKotlin, LangCsharp, LangGo, LangGroovy, LangPhp, LangScala, LangSwift: - return "\\/\\/" - case LangPy, LangLua, LangBash, LangRuby, LangYaml, LangDockerfile, LangElixir, LangHcl: - return "#" - case LangSql, LangElm: - return "--" - case LangHtml: - return "<\\!--" - case LangOCaml: - return "\\(\\*" - default: - return "" - } -} +// func GetEscapedCommentIdentifierFromPath(path string) string { +// lang := LanguageFromFilePath(path) +// switch lang { +// case LangJs, LangTs, LangTsx, LangJava, LangRust, LangCss, LangMarkdown, LangKotlin, LangCsharp, LangGo, LangGroovy, LangPhp, LangScala, LangSwift: +// return "\\/\\/" +// case LangPy, LangLua, LangBash, LangRuby, LangYaml, LangDockerfile, LangElixir, LangHcl: +// return "#" +// case LangSql, LangElm: +// return "--" +// case LangHtml: +// return "<\\!--" +// case LangOCaml: +// return "\\(\\*" +// default: +// return "" +// } +// } diff --git a/pkg/analysis/rule.go b/pkg/analysis/rule.go index 2ba6a658..02236394 100644 --- a/pkg/analysis/rule.go +++ b/pkg/analysis/rule.go @@ -1,33 +1,33 @@ package analysis -import sitter "github.com/smacker/go-tree-sitter" +// import sitter "github.com/smacker/go-tree-sitter" -type VisitFn func(checker Checker, node *sitter.Node) +// type VisitFn func(checker Checker, node *sitter.Node) -type Checker interface { - NodeType() string - GetLanguage() Language - OnEnter() *VisitFn - OnLeave() *VisitFn -} +// type Checker interface { +// NodeType() string +// GetLanguage() Language +// OnEnter() *VisitFn +// OnLeave() *VisitFn +// } -type checkerImpl struct { - nodeType string - language Language - onEnter *VisitFn - onLeave *VisitFn -} +// type checkerImpl struct { +// nodeType string +// language Language +// onEnter *VisitFn +// onLeave *VisitFn +// } -func (r *checkerImpl) NodeType() string { return r.nodeType } -func (r *checkerImpl) GetLanguage() Language { return r.language } -func (r *checkerImpl) OnEnter() *VisitFn { return r.onEnter } -func (r *checkerImpl) OnLeave() *VisitFn { return r.onLeave } +// func (r *checkerImpl) NodeType() string { return r.nodeType } +// func (r *checkerImpl) GetLanguage() Language { return r.language } +// func (r *checkerImpl) OnEnter() *VisitFn { return r.onEnter } +// func (r *checkerImpl) OnLeave() *VisitFn { return r.onLeave } -func CreateChecker(nodeType string, language Language, onEnter, onLeave *VisitFn) Checker { - return &checkerImpl{ - nodeType: nodeType, - language: language, - onEnter: onEnter, - onLeave: onLeave, - } -} +// func CreateChecker(nodeType string, language Language, onEnter, onLeave *VisitFn) Checker { +// return &checkerImpl{ +// nodeType: nodeType, +// language: language, +// onEnter: onEnter, +// onLeave: onLeave, +// } +// } diff --git a/pkg/analysis/scope.go b/pkg/analysis/scope.go index 2aefe4ad..9a2999f2 100644 --- a/pkg/analysis/scope.go +++ b/pkg/analysis/scope.go @@ -4,187 +4,187 @@ package analysis -import sitter "github.com/smacker/go-tree-sitter" - -// Reference represents a variable reference inside a source file -// Cross-file references like those in Golang and C++ (macros/extern) are NOT supported, -// so this shouldn't be used for checkers like "unused-variable", but is safe to use for checkers like -// "unused-import" -type Reference struct { - // IsWriteRef determines if this reference is a write reference. - // For write refs, only the expression being assigned is stored. - // i.e: for `a = 3`, this list will store the `3` node, not the assignment node - IsWriteRef bool - // Variable stores the variable being referenced - Variable *Variable - // Node stores the node that references the variable - Node *sitter.Node -} - -type VarKind int32 - -const ( - VarKindError VarKind = iota - VarKindImport - VarKindFunction - VarKindVariable - VarKindParameter -) - -type Variable struct { - Kind VarKind - // Stores the name of the variable - Name string - // DeclNode is the AST node that declares this variable - DeclNode *sitter.Node - // Refs is a list of references to this variable throughout the file - Refs []*Reference -} - -// ScopeBuilder is an interface that has to be implemented -// once for every supported language. -// Languages that don't implement a `ScopeBuilder` can still have checkers, just -// not any that require scope resolution. -type ScopeBuilder interface { - GetLanguage() Language - // NodeCreatesScope returns true if the node introduces a new scope - // into the scope tree - NodeCreatesScope(node *sitter.Node) bool - // DeclaresVariable determines if we can extract new variables out of this AST node - DeclaresVariable(node *sitter.Node) bool - // CollectVariables extracts variables from the node and adds them to the scope - CollectVariables(node *sitter.Node) []*Variable - // OnNodeEnter is called when the scope builder enters a node - // for the first time, and hasn't scanned its children decls just yet - // can be used to handle language specific scoping rules, if any - // If `node` is smth like a block statement, `currentScope` corresponds - // to the scope introduced by the block statement. - OnNodeEnter(node *sitter.Node, currentScope *Scope) - // OnNodeExit is called when the scope builder exits a node - // can be used to handle language specific scoping rules, if any - // If `node` is smth like a block statement, `currentScope` corresponds - // to the scope introduced by the block statement. - OnNodeExit(node *sitter.Node, currentScope *Scope) -} - -type Scope struct { - // AstNode is the AST node that introduces this scope into the scope tree - AstNode *sitter.Node - // Variables is a map of variable name to an object representing it - Variables map[string]*Variable - // Upper is the parent scope of this scope - Upper *Scope - // Children is a list of scopes that are children of this scope - Children []*Scope -} - -func NewScope(upper *Scope) *Scope { - return &Scope{ - Variables: map[string]*Variable{}, - Upper: upper, - } -} - -// Lookup searches for a variable in the current scope and its parents -func (s *Scope) Lookup(name string) *Variable { - if v, exists := s.Variables[name]; exists { - return v - } - - if s.Upper != nil { - return s.Upper.Lookup(name) - } - - return nil -} - -type ScopeTree struct { - Language Language - // ScopeOfNode maps every scope-having node to its corresponding scope. - // E.g: a block statement is mapped to the scope it introduces. - ScopeOfNode map[*sitter.Node]*Scope - // Root is the top-level scope in the program, - // usually associated with the `program` or `module` node - Root *Scope -} - -// BuildScopeTree constructs a scope tree from the AST for a program -func BuildScopeTree(builder ScopeBuilder, ast *sitter.Node, source []byte) *ScopeTree { - root := NewScope(nil) - root.AstNode = ast - - scopeOfNode := make(map[*sitter.Node]*Scope) - buildScopeTree(builder, source, ast, root, scopeOfNode) - - return &ScopeTree{ - Language: builder.GetLanguage(), - ScopeOfNode: scopeOfNode, - Root: root, - } -} - -func buildScopeTree( - builder ScopeBuilder, - source []byte, - node *sitter.Node, - scope *Scope, - scopeOfNode map[*sitter.Node]*Scope, -) *Scope { - builder.OnNodeEnter(node, scope) - defer builder.OnNodeExit(node, scope) - - if builder.DeclaresVariable(node) { - decls := builder.CollectVariables(node) - for _, decl := range decls { - scope.Variables[decl.Name] = decl - } - } - - nextScope := scope - if builder.NodeCreatesScope(node) { - nextScope = NewScope(scope) - nextScope.AstNode = node - scopeOfNode[node] = nextScope - - if scope != nil { - scope.Children = append(scope.Children, nextScope) - } else { - scope = nextScope // root - } - } - - for i := 0; i < int(node.NamedChildCount()); i++ { - child := node.NamedChild(i) - buildScopeTree(builder, source, child, nextScope, scopeOfNode) - } - - return scope -} - -// GetScope finds the nearest surrounding scope of an AST node -func (st *ScopeTree) GetScope(node *sitter.Node) *Scope { - if scope, exists := st.ScopeOfNode[node]; exists { - return scope - } - - if parent := node.Parent(); parent != nil { - return st.GetScope(parent) - } - - return nil -} - -func MakeScopeTree(lang Language, ast *sitter.Node, source []byte) *ScopeTree { - switch lang { - case LangPy: - return nil - case LangTs, LangJs, LangTsx: - builder := &TsScopeBuilder{ - ast: ast, - source: source, - } - return BuildScopeTree(builder, ast, source) - default: - return nil - } -} +// import sitter "github.com/smacker/go-tree-sitter" + +// // Reference represents a variable reference inside a source file +// // Cross-file references like those in Golang and C++ (macros/extern) are NOT supported, +// // so this shouldn't be used for checkers like "unused-variable", but is safe to use for checkers like +// // "unused-import" +// type Reference struct { +// // IsWriteRef determines if this reference is a write reference. +// // For write refs, only the expression being assigned is stored. +// // i.e: for `a = 3`, this list will store the `3` node, not the assignment node +// IsWriteRef bool +// // Variable stores the variable being referenced +// Variable *Variable +// // Node stores the node that references the variable +// Node *sitter.Node +// } + +// type VarKind int32 + +// const ( +// VarKindError VarKind = iota +// VarKindImport +// VarKindFunction +// VarKindVariable +// VarKindParameter +// ) + +// type Variable struct { +// Kind VarKind +// // Stores the name of the variable +// Name string +// // DeclNode is the AST node that declares this variable +// DeclNode *sitter.Node +// // Refs is a list of references to this variable throughout the file +// Refs []*Reference +// } + +// // ScopeBuilder is an interface that has to be implemented +// // once for every supported language. +// // Languages that don't implement a `ScopeBuilder` can still have checkers, just +// // not any that require scope resolution. +// type ScopeBuilder interface { +// GetLanguage() Language +// // NodeCreatesScope returns true if the node introduces a new scope +// // into the scope tree +// NodeCreatesScope(node *sitter.Node) bool +// // DeclaresVariable determines if we can extract new variables out of this AST node +// DeclaresVariable(node *sitter.Node) bool +// // CollectVariables extracts variables from the node and adds them to the scope +// CollectVariables(node *sitter.Node) []*Variable +// // OnNodeEnter is called when the scope builder enters a node +// // for the first time, and hasn't scanned its children decls just yet +// // can be used to handle language specific scoping rules, if any +// // If `node` is smth like a block statement, `currentScope` corresponds +// // to the scope introduced by the block statement. +// OnNodeEnter(node *sitter.Node, currentScope *Scope) +// // OnNodeExit is called when the scope builder exits a node +// // can be used to handle language specific scoping rules, if any +// // If `node` is smth like a block statement, `currentScope` corresponds +// // to the scope introduced by the block statement. +// OnNodeExit(node *sitter.Node, currentScope *Scope) +// } + +// type Scope struct { +// // AstNode is the AST node that introduces this scope into the scope tree +// AstNode *sitter.Node +// // Variables is a map of variable name to an object representing it +// Variables map[string]*Variable +// // Upper is the parent scope of this scope +// Upper *Scope +// // Children is a list of scopes that are children of this scope +// Children []*Scope +// } + +// func NewScope(upper *Scope) *Scope { +// return &Scope{ +// Variables: map[string]*Variable{}, +// Upper: upper, +// } +// } + +// // Lookup searches for a variable in the current scope and its parents +// func (s *Scope) Lookup(name string) *Variable { +// if v, exists := s.Variables[name]; exists { +// return v +// } + +// if s.Upper != nil { +// return s.Upper.Lookup(name) +// } + +// return nil +// } + +// type ScopeTree struct { +// Language Language +// // ScopeOfNode maps every scope-having node to its corresponding scope. +// // E.g: a block statement is mapped to the scope it introduces. +// ScopeOfNode map[*sitter.Node]*Scope +// // Root is the top-level scope in the program, +// // usually associated with the `program` or `module` node +// Root *Scope +// } + +// // BuildScopeTree constructs a scope tree from the AST for a program +// func BuildScopeTree(builder ScopeBuilder, ast *sitter.Node, source []byte) *ScopeTree { +// root := NewScope(nil) +// root.AstNode = ast + +// scopeOfNode := make(map[*sitter.Node]*Scope) +// buildScopeTree(builder, source, ast, root, scopeOfNode) + +// return &ScopeTree{ +// Language: builder.GetLanguage(), +// ScopeOfNode: scopeOfNode, +// Root: root, +// } +// } + +// func buildScopeTree( +// builder ScopeBuilder, +// source []byte, +// node *sitter.Node, +// scope *Scope, +// scopeOfNode map[*sitter.Node]*Scope, +// ) *Scope { +// builder.OnNodeEnter(node, scope) +// defer builder.OnNodeExit(node, scope) + +// if builder.DeclaresVariable(node) { +// decls := builder.CollectVariables(node) +// for _, decl := range decls { +// scope.Variables[decl.Name] = decl +// } +// } + +// nextScope := scope +// if builder.NodeCreatesScope(node) { +// nextScope = NewScope(scope) +// nextScope.AstNode = node +// scopeOfNode[node] = nextScope + +// if scope != nil { +// scope.Children = append(scope.Children, nextScope) +// } else { +// scope = nextScope // root +// } +// } + +// for i := 0; i < int(node.NamedChildCount()); i++ { +// child := node.NamedChild(i) +// buildScopeTree(builder, source, child, nextScope, scopeOfNode) +// } + +// return scope +// } + +// // GetScope finds the nearest surrounding scope of an AST node +// func (st *ScopeTree) GetScope(node *sitter.Node) *Scope { +// if scope, exists := st.ScopeOfNode[node]; exists { +// return scope +// } + +// if parent := node.Parent(); parent != nil { +// return st.GetScope(parent) +// } + +// return nil +// } + +// func MakeScopeTree(lang Language, ast *sitter.Node, source []byte) *ScopeTree { +// switch lang { +// case LangPy: +// return nil +// case LangTs, LangJs, LangTsx: +// builder := &TsScopeBuilder{ +// ast: ast, +// source: source, +// } +// return BuildScopeTree(builder, ast, source) +// default: +// return nil +// } +// } diff --git a/pkg/analysis/scope_ts.go b/pkg/analysis/scope_ts.go index f69b50cd..62d65661 100644 --- a/pkg/analysis/scope_ts.go +++ b/pkg/analysis/scope_ts.go @@ -1,295 +1,295 @@ // scope resolution implementation for JS and TS files package analysis -import ( - "slices" - - sitter "github.com/smacker/go-tree-sitter" -) - -type UnresolvedRef struct { - id *sitter.Node - surroundingScope *Scope -} - -type TsScopeBuilder struct { - ast *sitter.Node - source []byte - // unresolvedRefs is the list of references that could not be resolved thus far in the traversal - unresolvedRefs []UnresolvedRef -} - -func (j *TsScopeBuilder) GetLanguage() Language { - return LangJs -} - -var ScopeNodes = []string{ - "statement_block", - "function_declaration", - "function_expression", - "for_statement", - "for_in_statement", - "for_of_statement", - "program", -} - -func (ts *TsScopeBuilder) NodeCreatesScope(node *sitter.Node) bool { - return slices.Contains(ScopeNodes, node.Type()) -} - -func (ts *TsScopeBuilder) DeclaresVariable(node *sitter.Node) bool { - typ := node.Type() - // addition of function_declaration and formal_parameters necessary for functional scope handling. - return typ == "variable_declarator" || typ == "import_clause" || typ == "import_specifier" || typ == "formal_parameters" || typ == "function_declaration" -} - -func (ts *TsScopeBuilder) scanDecl(idOrPattern, declarator *sitter.Node, decls []*Variable) []*Variable { - switch idOrPattern.Type() { - case "identifier": - // = ... - nameStr := idOrPattern.Content(ts.source) - decls = append(decls, &Variable{ - Kind: VarKindVariable, - Name: nameStr, - DeclNode: declarator, - }) - - case "object_pattern": - // { } = ... - props := ChildrenOfType(idOrPattern, "shorthand_property_identifier_pattern") - for _, prop := range props { - decls = append(decls, &Variable{ - Kind: VarKindVariable, - Name: prop.Content(ts.source), - DeclNode: declarator, - }) - } - - pairs := ChildrenOfType(idOrPattern, "pair_pattern") - for _, pair := range pairs { - decls = ts.scanDecl(pair, declarator, decls) - } - - // { realName : } = ... - // alias can be an identifier or nested object pattern. - case "pair_pattern": - binding := idOrPattern.ChildByFieldName("value") - decls = ts.scanDecl(binding, declarator, decls) - - case "array_pattern": - // [ ] = foo - childrenIds := ChildrenOfType(idOrPattern, "identifier") - childrenObjPatterns := ChildrenOfType(idOrPattern, "object_pattern") - childrenArrayPatterns := ChildrenOfType(idOrPattern, "array_pattern") - for _, id := range childrenIds { - decls = append(decls, &Variable{ - Kind: VarKindVariable, - Name: id.Content(ts.source), - DeclNode: declarator, - }) - } - - for _, objPattern := range childrenObjPatterns { - decls = ts.scanDecl(objPattern, declarator, decls) - } - - for _, arrayPattern := range childrenArrayPatterns { - decls = ts.scanDecl(arrayPattern, declarator, decls) - } - - for _, objectPattern := range childrenObjPatterns { - decls = ts.scanDecl(objectPattern, declarator, decls) - } - } - - return decls -} - -func (ts *TsScopeBuilder) variableFromImportSpecifier(specifier *sitter.Node) *Variable { - name := specifier.ChildByFieldName("name") - if name == nil { - // skipcq: TCV-001 - return nil - } - - var Name string - if specifier.Child(2) != nil { - // alias ( as ) - local := specifier.Child(2) - Name = local.Content(ts.source) - } else { - // no alias - Name = name.Content(ts.source) - } - - return &Variable{ - Kind: VarKindImport, - Name: Name, - DeclNode: specifier, - } -} - -func (ts *TsScopeBuilder) CollectVariables(node *sitter.Node) []*Variable { - var declaredVars []*Variable - switch node.Type() { - case "variable_declarator": - lhs := node.ChildByFieldName("name") - return ts.scanDecl(lhs, node, declaredVars) - - case "function_declaration": - name := node.ChildByFieldName("name") - // skipcq: TCV-001 - if name == nil { - break - } - - declaredVars = append(declaredVars, &Variable{ - Kind: VarKindFunction, - Name: name.Content(ts.source), - DeclNode: node, - }) - - case "formal_parameters": - // TODO - - for i := 0; i < int(node.NamedChildCount()); i++ { - param := node.NamedChild(i) - if param == nil { - continue - } - // Handle different parameter types (required, optional, rest, patterns) - // Simple identifier parameter: function foo(x) - // Required parameter often wraps identifier: function foo(x: number) - var identifier *sitter.Node - if param.Type() == "identifier" { - identifier = param - } else if param.Type() == "required_parameter" || param.Type() == "optional_parameter" { - // Look for pattern which might be identifier or destructuring - pattern := param.ChildByFieldName("pattern") - if pattern != nil && pattern.Type() == "identifier" { - identifier = pattern - } - // TODO: Handle destructuring patterns within parameters if needed by calling scanDecl - } else if param.Type() == "assignment_pattern" { - // Parameter with default value: function foo(x = 1) - left := param.ChildByFieldName("left") - if left != nil && left.Type() == "identifier" { - identifier = left - } - // TODO: Handle destructuring patterns within parameters if needed by calling scanDecl - } - // TODO: Handle rest parameter (...)+ - if identifier != nil { - declaredVars = append(declaredVars, &Variable{ - Kind: VarKindParameter, - Name: identifier.Content(ts.source), - DeclNode: param, // Use the parameter node itself (or identifier) as DeclNode - }) - } - // Add handling for destructuring patterns here if necessary using scanDecl - } - - case "import_specifier": - // import { } from ... - variable := ts.variableFromImportSpecifier(node) - declaredVars = append(declaredVars, variable) - - case "import_clause": - // import , { } from ... - defaultImport := FirstChildOfType(node, "identifier") - if defaultImport != nil { - declaredVars = append(declaredVars, &Variable{ - Kind: VarKindImport, - Name: defaultImport.Content(ts.source), - DeclNode: defaultImport, - }) - } - } - - return declaredVars -} - -func (ts *TsScopeBuilder) OnNodeEnter(node *sitter.Node, scope *Scope) { - // collect identifier references if one is found - if node.Type() == "identifier" { - parent := node.Parent() - if parent == nil { - return - } - - parentType := parent.Type() - - if parentType == "variable_declarator" && parent.ChildByFieldName("name") == node { - return - } - - if parentType == "formal_parameters" { - return - } - - // binding identifiers in array patterns are not references. - // e.g. in `const [a, b] = foo;`, `a` and `b` are not references. - if parentType == "array_pattern" { - return - } - - if parentType == "assignment_pattern" && parent.ChildByFieldName("left") == node { - return - } - - if parentType == "required_parameter" && parent.ChildByFieldName("pattern") == node { - return - } - - // destructured property binding names are *not* references. - // e.g. in `const { a: b } = foo;`, `a` is not a reference. - if parentType == "pair_pattern" && parent.ChildByFieldName("key") == node { - return - } - - if parentType == "import_clause" || parentType == "import_specifier" { - return - } - - // try to resolve this reference to a target variable - variable := scope.Lookup(node.Content(ts.source)) - if variable == nil { - unresolved := UnresolvedRef{ - id: node, - surroundingScope: scope, - } - - ts.unresolvedRefs = append(ts.unresolvedRefs, unresolved) - return - } - - // If a variable is found, add a reference to it - ref := &Reference{ - Variable: variable, - Node: node, - } - variable.Refs = append(variable.Refs, ref) - } -} - -func (ts *TsScopeBuilder) OnNodeExit(node *sitter.Node, scope *Scope) { - if node.Type() == "program" { - // At the end, try to resolve all unresolved references - for _, unresolved := range ts.unresolvedRefs { - variable := unresolved.surroundingScope.Lookup( - unresolved.id.Content(ts.source), - ) - - if variable == nil { - continue - } - - ref := &Reference{ - Variable: variable, - Node: unresolved.id, - } - - variable.Refs = append(variable.Refs, ref) - } - } -} +// import ( +// "slices" + +// sitter "github.com/smacker/go-tree-sitter" +// ) + +// type UnresolvedRef struct { +// id *sitter.Node +// surroundingScope *Scope +// } + +// type TsScopeBuilder struct { +// ast *sitter.Node +// source []byte +// // unresolvedRefs is the list of references that could not be resolved thus far in the traversal +// unresolvedRefs []UnresolvedRef +// } + +// func (j *TsScopeBuilder) GetLanguage() Language { +// return LangJs +// } + +// var ScopeNodes = []string{ +// "statement_block", +// "function_declaration", +// "function_expression", +// "for_statement", +// "for_in_statement", +// "for_of_statement", +// "program", +// } + +// func (ts *TsScopeBuilder) NodeCreatesScope(node *sitter.Node) bool { +// return slices.Contains(ScopeNodes, node.Type()) +// } + +// func (ts *TsScopeBuilder) DeclaresVariable(node *sitter.Node) bool { +// typ := node.Type() +// // addition of function_declaration and formal_parameters necessary for functional scope handling. +// return typ == "variable_declarator" || typ == "import_clause" || typ == "import_specifier" || typ == "formal_parameters" || typ == "function_declaration" +// } + +// func (ts *TsScopeBuilder) scanDecl(idOrPattern, declarator *sitter.Node, decls []*Variable) []*Variable { +// switch idOrPattern.Type() { +// case "identifier": +// // = ... +// nameStr := idOrPattern.Content(ts.source) +// decls = append(decls, &Variable{ +// Kind: VarKindVariable, +// Name: nameStr, +// DeclNode: declarator, +// }) + +// case "object_pattern": +// // { } = ... +// props := ChildrenOfType(idOrPattern, "shorthand_property_identifier_pattern") +// for _, prop := range props { +// decls = append(decls, &Variable{ +// Kind: VarKindVariable, +// Name: prop.Content(ts.source), +// DeclNode: declarator, +// }) +// } + +// pairs := ChildrenOfType(idOrPattern, "pair_pattern") +// for _, pair := range pairs { +// decls = ts.scanDecl(pair, declarator, decls) +// } + +// // { realName : } = ... +// // alias can be an identifier or nested object pattern. +// case "pair_pattern": +// binding := idOrPattern.ChildByFieldName("value") +// decls = ts.scanDecl(binding, declarator, decls) + +// case "array_pattern": +// // [ ] = foo +// childrenIds := ChildrenOfType(idOrPattern, "identifier") +// childrenObjPatterns := ChildrenOfType(idOrPattern, "object_pattern") +// childrenArrayPatterns := ChildrenOfType(idOrPattern, "array_pattern") +// for _, id := range childrenIds { +// decls = append(decls, &Variable{ +// Kind: VarKindVariable, +// Name: id.Content(ts.source), +// DeclNode: declarator, +// }) +// } + +// for _, objPattern := range childrenObjPatterns { +// decls = ts.scanDecl(objPattern, declarator, decls) +// } + +// for _, arrayPattern := range childrenArrayPatterns { +// decls = ts.scanDecl(arrayPattern, declarator, decls) +// } + +// for _, objectPattern := range childrenObjPatterns { +// decls = ts.scanDecl(objectPattern, declarator, decls) +// } +// } + +// return decls +// } + +// func (ts *TsScopeBuilder) variableFromImportSpecifier(specifier *sitter.Node) *Variable { +// name := specifier.ChildByFieldName("name") +// if name == nil { +// // skipcq: TCV-001 +// return nil +// } + +// var Name string +// if specifier.Child(2) != nil { +// // alias ( as ) +// local := specifier.Child(2) +// Name = local.Content(ts.source) +// } else { +// // no alias +// Name = name.Content(ts.source) +// } + +// return &Variable{ +// Kind: VarKindImport, +// Name: Name, +// DeclNode: specifier, +// } +// } + +// func (ts *TsScopeBuilder) CollectVariables(node *sitter.Node) []*Variable { +// var declaredVars []*Variable +// switch node.Type() { +// case "variable_declarator": +// lhs := node.ChildByFieldName("name") +// return ts.scanDecl(lhs, node, declaredVars) + +// case "function_declaration": +// name := node.ChildByFieldName("name") +// // skipcq: TCV-001 +// if name == nil { +// break +// } + +// declaredVars = append(declaredVars, &Variable{ +// Kind: VarKindFunction, +// Name: name.Content(ts.source), +// DeclNode: node, +// }) + +// case "formal_parameters": +// // TODO + +// for i := 0; i < int(node.NamedChildCount()); i++ { +// param := node.NamedChild(i) +// if param == nil { +// continue +// } +// // Handle different parameter types (required, optional, rest, patterns) +// // Simple identifier parameter: function foo(x) +// // Required parameter often wraps identifier: function foo(x: number) +// var identifier *sitter.Node +// if param.Type() == "identifier" { +// identifier = param +// } else if param.Type() == "required_parameter" || param.Type() == "optional_parameter" { +// // Look for pattern which might be identifier or destructuring +// pattern := param.ChildByFieldName("pattern") +// if pattern != nil && pattern.Type() == "identifier" { +// identifier = pattern +// } +// // TODO: Handle destructuring patterns within parameters if needed by calling scanDecl +// } else if param.Type() == "assignment_pattern" { +// // Parameter with default value: function foo(x = 1) +// left := param.ChildByFieldName("left") +// if left != nil && left.Type() == "identifier" { +// identifier = left +// } +// // TODO: Handle destructuring patterns within parameters if needed by calling scanDecl +// } +// // TODO: Handle rest parameter (...)+ +// if identifier != nil { +// declaredVars = append(declaredVars, &Variable{ +// Kind: VarKindParameter, +// Name: identifier.Content(ts.source), +// DeclNode: param, // Use the parameter node itself (or identifier) as DeclNode +// }) +// } +// // Add handling for destructuring patterns here if necessary using scanDecl +// } + +// case "import_specifier": +// // import { } from ... +// variable := ts.variableFromImportSpecifier(node) +// declaredVars = append(declaredVars, variable) + +// case "import_clause": +// // import , { } from ... +// defaultImport := FirstChildOfType(node, "identifier") +// if defaultImport != nil { +// declaredVars = append(declaredVars, &Variable{ +// Kind: VarKindImport, +// Name: defaultImport.Content(ts.source), +// DeclNode: defaultImport, +// }) +// } +// } + +// return declaredVars +// } + +// func (ts *TsScopeBuilder) OnNodeEnter(node *sitter.Node, scope *Scope) { +// // collect identifier references if one is found +// if node.Type() == "identifier" { +// parent := node.Parent() +// if parent == nil { +// return +// } + +// parentType := parent.Type() + +// if parentType == "variable_declarator" && parent.ChildByFieldName("name") == node { +// return +// } + +// if parentType == "formal_parameters" { +// return +// } + +// // binding identifiers in array patterns are not references. +// // e.g. in `const [a, b] = foo;`, `a` and `b` are not references. +// if parentType == "array_pattern" { +// return +// } + +// if parentType == "assignment_pattern" && parent.ChildByFieldName("left") == node { +// return +// } + +// if parentType == "required_parameter" && parent.ChildByFieldName("pattern") == node { +// return +// } + +// // destructured property binding names are *not* references. +// // e.g. in `const { a: b } = foo;`, `a` is not a reference. +// if parentType == "pair_pattern" && parent.ChildByFieldName("key") == node { +// return +// } + +// if parentType == "import_clause" || parentType == "import_specifier" { +// return +// } + +// // try to resolve this reference to a target variable +// variable := scope.Lookup(node.Content(ts.source)) +// if variable == nil { +// unresolved := UnresolvedRef{ +// id: node, +// surroundingScope: scope, +// } + +// ts.unresolvedRefs = append(ts.unresolvedRefs, unresolved) +// return +// } + +// // If a variable is found, add a reference to it +// ref := &Reference{ +// Variable: variable, +// Node: node, +// } +// variable.Refs = append(variable.Refs, ref) +// } +// } + +// func (ts *TsScopeBuilder) OnNodeExit(node *sitter.Node, scope *Scope) { +// if node.Type() == "program" { +// // At the end, try to resolve all unresolved references +// for _, unresolved := range ts.unresolvedRefs { +// variable := unresolved.surroundingScope.Lookup( +// unresolved.id.Content(ts.source), +// ) + +// if variable == nil { +// continue +// } + +// ref := &Reference{ +// Variable: variable, +// Node: unresolved.id, +// } + +// variable.Refs = append(variable.Refs, ref) +// } +// } +// } diff --git a/pkg/analysis/scope_ts_test.go b/pkg/analysis/scope_ts_test.go index 389a8de2..175cf672 100644 --- a/pkg/analysis/scope_ts_test.go +++ b/pkg/analysis/scope_ts_test.go @@ -1,133 +1,133 @@ package analysis -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func parseFile(t *testing.T, source string) *ParseResult { - parsed, err := Parse("file.ts", []byte(source), LangJs, LangJs.Grammar()) - require.NoError(t, err) - require.NotNil(t, parsed) - return parsed -} - -func Test_BuildScopeTree(t *testing.T) { - t.Run("is able to resolve references", func(t *testing.T) { - source := ` - let x = 1 - { - let y = x - }` - parsed := parseFile(t, source) - - scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) - require.NotNil(t, scopeTree) - globalScope := scopeTree.Root.Children[0] - varX, exists := globalScope.Variables["x"] - require.True(t, exists) - require.NotNil(t, varX) - - varY, exists := globalScope.Children[0].Variables["y"] - require.True(t, exists) - require.NotNil(t, varY) - require.Equal(t, VarKindVariable, varY.Kind) - - assert.Equal(t, 1, len(varX.Refs)) - xRef := varX.Refs[0] - assert.Equal(t, "x", xRef.Variable.Name) - require.Equal(t, VarKindVariable, varY.Kind) - }) - - t.Run("supports import statements", func(t *testing.T) { - source := ` - import { extname } from 'path' - { - let { extname = 1 } = null // does NOT count as a reference - } - - let { x = extname } = null // counts as a reference - - { - extname('file.txt') // counts as a reference - let { extname } = null // does NOT count as a reference - } - - import { readFile as r } from 'file' - r('file.txt') - function f(r = x) {} // NOT a reference - ` - parsed := parseFile(t, source) - - scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) - require.NotNil(t, scopeTree) - globalScope := scopeTree.Root.Children[0] - { - varR, exists := globalScope.Variables["r"] - require.True(t, exists) - require.NotNil(t, varR) - - assert.Equal(t, VarKindImport, varR.Kind) - - rRefs := varR.Refs - require.Equal(t, 1, len(rRefs)) - assert.Equal(t, "call_expression", rRefs[0].Node.Parent().Type()) - } - - { - varExtname, exists := globalScope.Variables["extname"] - require.True(t, exists) - require.NotNil(t, varExtname) - - assert.Equal(t, VarKindImport, varExtname.Kind) - - extnameRefs := varExtname.Refs - require.Equal(t, 2, len(extnameRefs)) - assert.Equal(t, "object_assignment_pattern", extnameRefs[0].Node.Parent().Type()) - assert.Equal(t, "call_expression", extnameRefs[1].Node.Parent().Type()) - } - }) - - t.Run("handles function declaration with parameters", func(t *testing.T) { - source := ` - function greet(name, age = 18) { - let greeting = "Hello"; - return greeting + " " + name; - } - greet("Alice") - ` - - parsed := parseFile(t, source) - require.NotNil(t, parsed) - scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) - globalScope := scopeTree.Root.Children[0] - // Checking function declaration - funcVar := globalScope.Lookup("greet") - require.NotNil(t, funcVar) - funcVariable, exists := globalScope.Variables["greet"] // tagged as an Identifier - require.True(t, exists) - require.NotNil(t, funcVariable) - - funcScope := scopeTree.GetScope(funcVar.DeclNode) - require.NotNil(t, funcScope) - - nameVar, exists := funcScope.Variables["name"] - require.True(t, exists) - require.Equal(t, VarKindParameter, nameVar.Kind) - - ageVar, exists := funcScope.Variables["age"] - require.True(t, exists) - require.Equal(t, VarKindParameter, ageVar.Kind) - - // existence of function body - - bodyScope := funcScope.Children[0] - require.NotNil(t, bodyScope) - - greetingVar, exists := bodyScope.Variables["greeting"] - require.True(t, exists) - require.Equal(t, VarKindVariable, greetingVar.Kind) - }) -} +// import ( +// "testing" + +// "github.com/stretchr/testify/assert" +// "github.com/stretchr/testify/require" +// ) + +// func parseFile(t *testing.T, source string) *ParseResult { +// parsed, err := Parse("file.ts", []byte(source), LangJs, LangJs.Grammar()) +// require.NoError(t, err) +// require.NotNil(t, parsed) +// return parsed +// } + +// func Test_BuildScopeTree(t *testing.T) { +// t.Run("is able to resolve references", func(t *testing.T) { +// source := ` +// let x = 1 +// { +// let y = x +// }` +// parsed := parseFile(t, source) + +// scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) +// require.NotNil(t, scopeTree) +// globalScope := scopeTree.Root.Children[0] +// varX, exists := globalScope.Variables["x"] +// require.True(t, exists) +// require.NotNil(t, varX) + +// varY, exists := globalScope.Children[0].Variables["y"] +// require.True(t, exists) +// require.NotNil(t, varY) +// require.Equal(t, VarKindVariable, varY.Kind) + +// assert.Equal(t, 1, len(varX.Refs)) +// xRef := varX.Refs[0] +// assert.Equal(t, "x", xRef.Variable.Name) +// require.Equal(t, VarKindVariable, varY.Kind) +// }) + +// t.Run("supports import statements", func(t *testing.T) { +// source := ` +// import { extname } from 'path' +// { +// let { extname = 1 } = null // does NOT count as a reference +// } + +// let { x = extname } = null // counts as a reference + +// { +// extname('file.txt') // counts as a reference +// let { extname } = null // does NOT count as a reference +// } + +// import { readFile as r } from 'file' +// r('file.txt') +// function f(r = x) {} // NOT a reference +// ` +// parsed := parseFile(t, source) + +// scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) +// require.NotNil(t, scopeTree) +// globalScope := scopeTree.Root.Children[0] +// { +// varR, exists := globalScope.Variables["r"] +// require.True(t, exists) +// require.NotNil(t, varR) + +// assert.Equal(t, VarKindImport, varR.Kind) + +// rRefs := varR.Refs +// require.Equal(t, 1, len(rRefs)) +// assert.Equal(t, "call_expression", rRefs[0].Node.Parent().Type()) +// } + +// { +// varExtname, exists := globalScope.Variables["extname"] +// require.True(t, exists) +// require.NotNil(t, varExtname) + +// assert.Equal(t, VarKindImport, varExtname.Kind) + +// extnameRefs := varExtname.Refs +// require.Equal(t, 2, len(extnameRefs)) +// assert.Equal(t, "object_assignment_pattern", extnameRefs[0].Node.Parent().Type()) +// assert.Equal(t, "call_expression", extnameRefs[1].Node.Parent().Type()) +// } +// }) + +// t.Run("handles function declaration with parameters", func(t *testing.T) { +// source := ` +// function greet(name, age = 18) { +// let greeting = "Hello"; +// return greeting + " " + name; +// } +// greet("Alice") +// ` + +// parsed := parseFile(t, source) +// require.NotNil(t, parsed) +// scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) +// globalScope := scopeTree.Root.Children[0] +// // Checking function declaration +// funcVar := globalScope.Lookup("greet") +// require.NotNil(t, funcVar) +// funcVariable, exists := globalScope.Variables["greet"] // tagged as an Identifier +// require.True(t, exists) +// require.NotNil(t, funcVariable) + +// funcScope := scopeTree.GetScope(funcVar.DeclNode) +// require.NotNil(t, funcScope) + +// nameVar, exists := funcScope.Variables["name"] +// require.True(t, exists) +// require.Equal(t, VarKindParameter, nameVar.Kind) + +// ageVar, exists := funcScope.Variables["age"] +// require.True(t, exists) +// require.Equal(t, VarKindParameter, ageVar.Kind) + +// // existence of function body + +// bodyScope := funcScope.Children[0] +// require.NotNil(t, bodyScope) + +// greetingVar, exists := bodyScope.Variables["greeting"] +// require.True(t, exists) +// require.Equal(t, VarKindVariable, greetingVar.Kind) +// }) +// } diff --git a/pkg/analysis/walk.go b/pkg/analysis/walk.go index 06273995..d04af646 100644 --- a/pkg/analysis/walk.go +++ b/pkg/analysis/walk.go @@ -1,96 +1,96 @@ package analysis -import ( - sitter "github.com/smacker/go-tree-sitter" -) +// import ( +// sitter "github.com/smacker/go-tree-sitter" +// ) -// Walker is an interface that dictates what to do when -// entering and leaving each node during the pre-order traversal -// of a tree. -// To traverse post-order, use the `OnLeaveNode` callback. -type Walker interface { - // OnEnterNode is called when the walker enters a node. - // The boolean return value indicates whether the walker should - // continue walking the sub-tree of this node. - OnEnterNode(node *sitter.Node) bool - // OnLeaveNode is called when the walker leaves a node. - // This is called after all the children of the node have been visited and explored. - OnLeaveNode(node *sitter.Node) -} +// // Walker is an interface that dictates what to do when +// // entering and leaving each node during the pre-order traversal +// // of a tree. +// // To traverse post-order, use the `OnLeaveNode` callback. +// type Walker interface { +// // OnEnterNode is called when the walker enters a node. +// // The boolean return value indicates whether the walker should +// // continue walking the sub-tree of this node. +// OnEnterNode(node *sitter.Node) bool +// // OnLeaveNode is called when the walker leaves a node. +// // This is called after all the children of the node have been visited and explored. +// OnLeaveNode(node *sitter.Node) +// } -func WalkTree(node *sitter.Node, walker Walker) { - goInside := walker.OnEnterNode(node) - if goInside { - for i := 0; i < int(node.NamedChildCount()); i++ { - child := node.NamedChild(i) - WalkTree(child, walker) - } - } +// func WalkTree(node *sitter.Node, walker Walker) { +// goInside := walker.OnEnterNode(node) +// if goInside { +// for i := 0; i < int(node.NamedChildCount()); i++ { +// child := node.NamedChild(i) +// WalkTree(child, walker) +// } +// } - walker.OnLeaveNode(node) -} +// walker.OnLeaveNode(node) +// } -// ChildrenWithFieldName returns all the children of a node -// with a specific field name. -// Tree-sitter can have multiple children with the same field name. -func ChildrenWithFieldName(node *sitter.Node, fieldName string) []*sitter.Node { - var children []*sitter.Node - for i := 0; i < int(node.ChildCount()); i++ { - if node.FieldNameForChild(i) == fieldName { - child := node.Child(i) - children = append(children, child) - } - } +// // ChildrenWithFieldName returns all the children of a node +// // with a specific field name. +// // Tree-sitter can have multiple children with the same field name. +// func ChildrenWithFieldName(node *sitter.Node, fieldName string) []*sitter.Node { +// var children []*sitter.Node +// for i := 0; i < int(node.ChildCount()); i++ { +// if node.FieldNameForChild(i) == fieldName { +// child := node.Child(i) +// children = append(children, child) +// } +// } - return children -} +// return children +// } -// FindMatchingChild iterates over all children of a node—both named and unnamed—and returns the -// first child that matches the predicate function. -func FindMatchingChild(node *sitter.Node, predicate func(*sitter.Node) bool) *sitter.Node { - nChildren := int(node.ChildCount()) +// // FindMatchingChild iterates over all children of a node—both named and unnamed—and returns the +// // first child that matches the predicate function. +// func FindMatchingChild(node *sitter.Node, predicate func(*sitter.Node) bool) *sitter.Node { +// nChildren := int(node.ChildCount()) - for i := 0; i < nChildren; i++ { - child := node.Child(i) - if predicate(child) { - return child - } - } +// for i := 0; i < nChildren; i++ { +// child := node.Child(i) +// if predicate(child) { +// return child +// } +// } - return nil -} +// return nil +// } -func ChildrenOfType(node *sitter.Node, nodeType string) []*sitter.Node { - nChildren := int(node.ChildCount()) - var results []*sitter.Node - for i := 0; i < nChildren; i++ { - child := node.Child(i) - if child.Type() == nodeType { - results = append(results, child) - } - } - return results -} +// func ChildrenOfType(node *sitter.Node, nodeType string) []*sitter.Node { +// nChildren := int(node.ChildCount()) +// var results []*sitter.Node +// for i := 0; i < nChildren; i++ { +// child := node.Child(i) +// if child.Type() == nodeType { +// results = append(results, child) +// } +// } +// return results +// } -func ChildWithFieldName(node *sitter.Node, fieldName string) *sitter.Node { - nChildren := int(node.NamedChildCount()) - for i := 0; i < nChildren; i++ { - if node.FieldNameForChild(i) == fieldName { - return node.Child(i) - } - } +// func ChildWithFieldName(node *sitter.Node, fieldName string) *sitter.Node { +// nChildren := int(node.NamedChildCount()) +// for i := 0; i < nChildren; i++ { +// if node.FieldNameForChild(i) == fieldName { +// return node.Child(i) +// } +// } - return nil -} +// return nil +// } -func FirstChildOfType(node *sitter.Node, nodeType string) *sitter.Node { - nChildren := int(node.ChildCount()) - for i := 0; i < nChildren; i++ { - child := node.Child(i) - if child.Type() == nodeType { - return child - } - } +// func FirstChildOfType(node *sitter.Node, nodeType string) *sitter.Node { +// nChildren := int(node.ChildCount()) +// for i := 0; i < nChildren; i++ { +// child := node.Child(i) +// if child.Type() == nodeType { +// return child +// } +// } - return nil -} +// return nil +// } diff --git a/pkg/cli/cli.go b/pkg/cli/cli.go index 495beb5a..cadecb89 100644 --- a/pkg/cli/cli.go +++ b/pkg/cli/cli.go @@ -17,7 +17,8 @@ import ( goAnalysis "globstar.dev/analysis" "globstar.dev/checkers" "globstar.dev/checkers/discover" - "globstar.dev/pkg/analysis" + + // "globstar.dev/pkg/analysis" "globstar.dev/pkg/config" "globstar.dev/util" ) @@ -26,9 +27,9 @@ type Cli struct { // RootDirectory is the target directory to analyze RootDirectory string // Checkers is a list of checkers that are applied to the files in `RootDirectory` - Checkers []analysis.Checker - Config *config.Config - CmpHash string + // Checkers []analysis.Checker + Config *config.Config + CmpHash string } func (c *Cli) loadConfig() error { @@ -193,15 +194,15 @@ to run only the built-in checkers, and --checkers=all to run both.`, // Track test failures but continue running all tests var testsFailed bool - _, _, yamlPassed, err := goAnalysis.RunAnalyzerTests(analysisDir, []*goAnalysis.Analyzer{}) + yamlPassed, err := runTestCases(analysisDir) if err != nil { err = fmt.Errorf("error running YAML tests: %w", err) fmt.Fprintln(os.Stderr, err.Error()) // Don't return immediately, continue with other tests } if !yamlPassed { - return fmt.Errorf("YAML tests failed ") testsFailed = true + return fmt.Errorf("YAML tests failed ") } goPassed := true @@ -454,35 +455,35 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { // run checker // the first arg is empty, since the format for inbuilt Go-based checkers has changed // TODO: factor it in later - nonYamlAnalyzers := []*goAnalysis.Analyzer{} - issues, err := goAnalysis.RunAnalyzers(c.RootDirectory, nonYamlAnalyzers, func(filename string) bool { - if c.CmpHash != "" { - _, isChanged := changedFileMap[filename] - return isChanged - } - return true - }) - - if err != nil { - // parse error on a single file should not exit the entire analysis process - // TODO: logging the below error message is not helpful, as it logs unsupported file types as well - // fmt.Fprintf(os.Stderr, "Error parsing file %s: %s\n", path, err) - return nil - } - - for _, issue := range issues { - txt, _ := issue.AsText() - log.Error().Msg(string(txt)) - - result.issues = append(result.issues, &goAnalysis.Issue{ - Filepath: issue.Filepath, - Message: issue.Message, - Severity: goAnalysis.Severity(issue.Severity), - Category: goAnalysis.Category(issue.Category), - Node: issue.Node, - Id: issue.Id, - }) - } + // nonYamlAnalyzers := []*goAnalysis.Analyzer{} + // issues, err := goAnalysis.RunAnalyzers(c.RootDirectory, nonYamlAnalyzers, func(filename string) bool { + // if c.CmpHash != "" { + // _, isChanged := changedFileMap[filename] + // return isChanged + // } + // return true + // }) + + // if err != nil { + // // parse error on a single file should not exit the entire analysis process + // // TODO: logging the below error message is not helpful, as it logs unsupported file types as well + // // fmt.Fprintf(os.Stderr, "Error parsing file %s: %s\n", path, err) + // return nil + // } + + // for _, issue := range issues { + // txt, _ := issue.AsText() + // log.Error().Msg(string(txt)) + + // result.issues = append(result.issues, &goAnalysis.Issue{ + // Filepath: issue.Filepath, + // Message: issue.Message, + // Severity: goAnalysis.Severity(issue.Severity), + // Category: goAnalysis.Category(issue.Category), + // Node: issue.Node, + // Id: issue.Id, + // }) + // } return nil }) diff --git a/pkg/cli/test_runner.go b/pkg/cli/test_runner.go index 31d00123..a8c05e55 100644 --- a/pkg/cli/test_runner.go +++ b/pkg/cli/test_runner.go @@ -1,180 +1,182 @@ package cli -// import ( -// "bufio" -// "fmt" -// "io/fs" -// "os" -// "path/filepath" -// "slices" -// "strings" - -// "globstar.dev/pkg/analysis" -// ) - -// func runTests(dir string) (bool, error) { -// passed, err := runTestCases(dir) -// if err != nil { -// return false, err -// } - -// return passed, nil -// } - -// type testCase struct { -// yamlCheckerPath string -// testFile string -// } - -// func findTestCases(dir string) ([]testCase, error) { -// var pairs []testCase // List of checker file/test file pairs - -// err := filepath.Walk(dir, func(path string, d fs.FileInfo, err error) error { -// if err != nil { -// return nil -// } - -// if d.IsDir() { -// return nil -// } - -// if d.Mode()&fs.ModeSymlink != 0 { -// // skip symlinks -// return nil -// } - -// fileExt := filepath.Ext(path) -// isYamlFile := fileExt == ".yaml" || fileExt == ".yml" -// if !isYamlFile { -// return nil -// } - -// patternChecker, err := analysis.ReadFromFile(path) -// if err != nil { -// fmt.Fprintf(os.Stderr, "invalid checker '%s': %s\n", d.Name(), err.Error()) -// return nil -// } - -// testFile := strings.TrimSuffix(path, fileExt) + ".test" + analysis.GetExtFromLanguage(patternChecker.Language) - -// if _, err := os.Stat(testFile); os.IsNotExist(err) { -// testFile = "" -// } - -// pairs = append(pairs, testCase{ -// yamlCheckerPath: path, -// testFile: testFile, -// }) - -// return nil -// }) - -// return pairs, err -// } - -// func runTestCases(dir string) (passed bool, err error) { -// testCases, err := findTestCases(dir) -// if err != nil { -// return false, err -// } - -// if len(testCases) == 0 { -// return false, fmt.Errorf("no test cases found") -// } - -// passed = true -// for _, tc := range testCases { -// if tc.testFile == "" { -// fmt.Fprintf(os.Stderr, "No test cases found for test: %s\n", filepath.Base(tc.yamlCheckerPath)) -// continue -// } - -// fmt.Fprintf(os.Stderr, "Running test case: %s\n", filepath.Base(tc.yamlCheckerPath)) -// // Read and parse the checker definition -// checker, err := analysis.ReadFromFile(tc.yamlCheckerPath) -// if err != nil { -// return false, err -// } - -// // Parse the test file -// analyzer, err := analysis.FromFile(tc.testFile, []analysis.Checker{}) -// if err != nil { -// return false, err -// } -// analyzer.WorkDir = dir -// analyzer.Analyzers = append(analyzer.Analyzers, checker) -// issues := analyzer.Analyze() - -// want, err := findExpectedLines(tc.testFile) -// if err != nil { -// return false, err -// } - -// var got []int -// for _, issue := range issues { -// got = append(got, int(issue.Node.Range().StartPoint.Row)+1) // 0-indexed to 1-indexed -// } - -// slices.Sort(got) - -// testName := filepath.Base(tc.testFile) - -// if len(want) != len(got) { -// message := fmt.Sprintf( -// "(%s): expected issues on the following lines: %v\nbut issues were raised on lines: %v\n", -// testName, -// want, -// got, -// ) - -// fmt.Fprintf(os.Stderr, "%s", message) -// passed = false -// continue -// } - -// for i := range want { -// if want[i] != got[i] { -// message := fmt.Sprintf( -// "(%s): expected issue on line %d, but next occurrence is on line %d\n", -// testName, -// want, -// got, -// ) - -// fmt.Fprintf(os.Stderr, "%s\n", message) -// passed = false -// } -// } -// } - -// return passed, nil -// } - -// // findExpectedLines reads a file and returns line numbers containing "" -// // (incremented by 1). -// func findExpectedLines(filePath string) ([]int, error) { -// file, err := os.Open(filePath) -// if err != nil { -// return nil, err -// } -// defer file.Close() - -// var expectedLines []int -// scanner := bufio.NewScanner(file) - -// lineNumber := 0 -// for scanner.Scan() { -// text := strings.ToLower(scanner.Text()) -// lineNumber++ -// if strings.Contains(text, "") || strings.Contains(text, "") { -// expectedLines = append(expectedLines, lineNumber+1) -// } -// } - -// // Check for scanner errors -// if err := scanner.Err(); err != nil { -// return nil, err -// } - -// return expectedLines, nil -// } +import ( + "bufio" + "fmt" + "io/fs" + "os" + "path/filepath" + "slices" + "strings" + + ana "globstar.dev/analysis" +) + +func runTests(dir string) (bool, error) { + passed, err := runTestCases(dir) + if err != nil { + return false, err + } + + return passed, nil +} + +type testCase struct { + yamlCheckerPath string + testFile string +} + +func findTestCases(dir string) ([]testCase, error) { + var pairs []testCase // List of checker file/test file pairs + + err := filepath.Walk(dir, func(path string, d fs.FileInfo, err error) error { + if err != nil { + return nil + } + + if d.IsDir() { + return nil + } + + if d.Mode()&fs.ModeSymlink != 0 { + // skip symlinks + return nil + } + + fileExt := filepath.Ext(path) + isYamlFile := fileExt == ".yaml" || fileExt == ".yml" + if !isYamlFile { + return nil + } + + patternChecker, err := ana.ReadFromFile(path) + if err != nil { + fmt.Fprintf(os.Stderr, "invalid checker '%s': %s\n", d.Name(), err.Error()) + return nil + } + + testFile := strings.TrimSuffix(path, fileExt) + ".test" + ana.GetExtFromLanguage(patternChecker.Language) + + if _, err := os.Stat(testFile); os.IsNotExist(err) { + testFile = "" + } + + pairs = append(pairs, testCase{ + yamlCheckerPath: path, + testFile: testFile, + }) + + return nil + }) + + return pairs, err +} + +func runTestCases(dir string) (passed bool, err error) { + testCases, err := findTestCases(dir) + if err != nil { + return false, err + } + + if len(testCases) == 0 { + return false, fmt.Errorf("no test cases found") + } + + passed = true + for _, tc := range testCases { + if tc.testFile == "" { + fmt.Fprintf(os.Stderr, "No test cases found for test: %s\n", filepath.Base(tc.yamlCheckerPath)) + continue + } + + fmt.Fprintf(os.Stderr, "Running test case: %s\n", filepath.Base(tc.yamlCheckerPath)) + // Read and parse the checker definition + checker, err := ana.ReadFromFile(tc.yamlCheckerPath) + if err != nil { + return false, err + } + + // Parse the test file + // analyzer, err := analysis.FromFile(tc.testFile, []analysis.Checker{}) + // if err != nil { + // return false, err + // } + + want, err := findExpectedLines(tc.testFile) + if err != nil { + return false, err + } + + issues, err := ana.RunAnalyzers(tc.testFile, []*ana.Analyzer{&checker}, nil) + if err != nil { + return false, err + } + + var got []int + for _, issue := range issues { + got = append(got, int(issue.Node.Range().StartPoint.Row)+1) // 0-indexed to 1-indexed + } + + slices.Sort(got) + + testName := filepath.Base(tc.testFile) + + if len(want) != len(got) { + message := fmt.Sprintf( + "(%s): expected issues on the following lines: %v\nbut issues were raised on lines: %v\n", + testName, + want, + got, + ) + + fmt.Fprintf(os.Stderr, "%s", message) + passed = false + continue + } + + for i := range want { + if want[i] != got[i] { + message := fmt.Sprintf( + "(%s): expected issue on line %d, but next occurrence is on line %d\n", + testName, + want, + got, + ) + + fmt.Fprintf(os.Stderr, "%s\n", message) + passed = false + } + } + } + + return passed, nil +} + +// findExpectedLines reads a file and returns line numbers containing "" +// (incremented by 1). +func findExpectedLines(filePath string) ([]int, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + var expectedLines []int + scanner := bufio.NewScanner(file) + + lineNumber := 0 + for scanner.Scan() { + text := strings.ToLower(scanner.Text()) + lineNumber++ + if strings.Contains(text, "") || strings.Contains(text, "") { + expectedLines = append(expectedLines, lineNumber+1) + } + } + + // Check for scanner errors + if err := scanner.Err(); err != nil { + return nil, err + } + + return expectedLines, nil +} From 6129df1a95ced01704c3603458c2135d1abeb041 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Tue, 3 Jun 2025 22:08:55 +0530 Subject: [PATCH 04/12] chore: add test-cases for the yaml runtime --- analysis/testdata/mock-checker.yml | 11 +++ analysis/testdata/node-filter-checker.yml | 9 +++ .../testdata/node-filter-test-checker.test.js | 10 +++ .../testdata/node-filter-test-checker.yml | 15 ++++ analysis/testrunner.go | 2 +- analysis/yaml.go | 32 ++++---- analysis/yaml_test.go | 76 +++++++++++++++++++ checkers/checker.go | 2 +- pkg/analysis/pattern_rule.go | 2 +- pkg/analysis/scope_ts_test.go | 2 +- pkg/cli/test_runner.go | 4 +- 11 files changed, 143 insertions(+), 22 deletions(-) create mode 100644 analysis/testdata/mock-checker.yml create mode 100644 analysis/testdata/node-filter-checker.yml create mode 100644 analysis/testdata/node-filter-test-checker.test.js create mode 100644 analysis/testdata/node-filter-test-checker.yml create mode 100644 analysis/yaml_test.go diff --git a/analysis/testdata/mock-checker.yml b/analysis/testdata/mock-checker.yml new file mode 100644 index 00000000..db1a5592 --- /dev/null +++ b/analysis/testdata/mock-checker.yml @@ -0,0 +1,11 @@ +language: javascript +name: mock-checker +message: "This is just a mock checker" +category: style +severity: info +pattern: + (call_expression) @mock-checker +description: | + This is a mock checker. + + diff --git a/analysis/testdata/node-filter-checker.yml b/analysis/testdata/node-filter-checker.yml new file mode 100644 index 00000000..0a5a9066 --- /dev/null +++ b/analysis/testdata/node-filter-checker.yml @@ -0,0 +1,9 @@ +language: javascript +name: node-filter-checker +message: "Variable @var found inside function" +category: style +severity: info +pattern: (variable_declarator) @var @node-filter-checker +filters: + - pattern-inside: (function_declaration) +description: "Check for variables declared inside functions" \ No newline at end of file diff --git a/analysis/testdata/node-filter-test-checker.test.js b/analysis/testdata/node-filter-test-checker.test.js new file mode 100644 index 00000000..c8431f79 --- /dev/null +++ b/analysis/testdata/node-filter-test-checker.test.js @@ -0,0 +1,10 @@ +console.log("Hello, world!"); + +function foo(){ + // + console.log("This should be detected"); + + /* + console.log("This Should not be detected"); + */ +} \ No newline at end of file diff --git a/analysis/testdata/node-filter-test-checker.yml b/analysis/testdata/node-filter-test-checker.yml new file mode 100644 index 00000000..dd963872 --- /dev/null +++ b/analysis/testdata/node-filter-test-checker.yml @@ -0,0 +1,15 @@ +language: javascript +name: node-filter-test-checker +message: "Variable @var found inside function" +category: style +severity: info +pattern: > + (call_expression + function: (member_expression + object: (identifier) @obj + property: (property_identifier) @method + (#eq? @obj "console"))) @node-filter-test-checker +filters: + - pattern-inside: (function_declaration) + - pattern-not-inside: (comment) +description: "Check for variables declared inside functions" diff --git a/analysis/testrunner.go b/analysis/testrunner.go index d99be964..e9be4a20 100644 --- a/analysis/testrunner.go +++ b/analysis/testrunner.go @@ -167,7 +167,7 @@ func discoverYamlAnalyzers(testDir string) ([]*Analyzer, error) { baseName := strings.TrimSuffix(path, fileExt) // Try to read the YAML checker - analyzer, err := ReadFromFile(path) + analyzer, _, err := ReadFromFile(path) if err != nil { // Skip files that aren't valid checkers return nil diff --git a/analysis/yaml.go b/analysis/yaml.go index 7dc57c3e..98f2f753 100644 --- a/analysis/yaml.go +++ b/analysis/yaml.go @@ -71,56 +71,56 @@ type YamlAnalyzer struct { } // ReadFromFile reads a pattern checker definition from a YAML config file. -func ReadFromFile(filePath string) (Analyzer, error) { +func ReadFromFile(filePath string) (Analyzer, YamlAnalyzer, error) { fileContent, err := os.ReadFile(filePath) if err != nil { - return Analyzer{}, err + return Analyzer{}, YamlAnalyzer{}, err } return ReadFromBytes(fileContent) } // ReadFromBytes reads a pattern checker definition from bytes array -func ReadFromBytes(fileContent []byte) (Analyzer, error) { +func ReadFromBytes(fileContent []byte) (Analyzer, YamlAnalyzer,error) { var checker Yaml if err := yaml.Unmarshal(fileContent, &checker); err != nil { - return Analyzer{}, err + return Analyzer{}, YamlAnalyzer{}, err } lang := DecodeLanguage(checker.Language) if lang == LangUnknown { - return Analyzer{}, fmt.Errorf("unknown language code: '%s'", checker.Language) + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("unknown language code: '%s'", checker.Language) } if checker.Code == "" { - return Analyzer{}, fmt.Errorf("no name provided in checker definition") + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("no name provided in checker definition") } if checker.Message == "" { - return Analyzer{}, fmt.Errorf("no message provided in checker '%s'", checker.Code) + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("no message provided in checker '%s'", checker.Code) } var patterns []*sitter.Query if checker.Pattern != "" { pattern, err := sitter.NewQuery([]byte(checker.Pattern), lang.Grammar()) if err != nil { - return Analyzer{}, err + return Analyzer{}, YamlAnalyzer{}, err } patterns = append(patterns, pattern) } else if len(checker.Patterns) > 0 { for _, patternStr := range checker.Patterns { pattern, err := sitter.NewQuery([]byte(patternStr), lang.Grammar()) if err != nil { - return Analyzer{}, err + return Analyzer{}, YamlAnalyzer{}, err } patterns = append(patterns, pattern) } } else { - return Analyzer{}, fmt.Errorf("no pattern provided in checker '%s'", checker.Code) + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("no pattern provided in checker '%s'", checker.Code) } if checker.Pattern != "" && len(checker.Patterns) > 0 { - return Analyzer{}, fmt.Errorf("only one of 'pattern' or 'patterns' can be provided in a checker definition") + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("only one of 'pattern' or 'patterns' can be provided in a checker definition") } // include and exclude patterns @@ -134,7 +134,7 @@ func ReadFromBytes(fileContent []byte) (Analyzer, error) { for _, exclude := range checker.Exclude { g, err := glob.Compile(exclude) if err != nil { - return Analyzer{}, err + return Analyzer{}, YamlAnalyzer{}, err } pathFilter.ExcludeGlobs = append(pathFilter.ExcludeGlobs, g) } @@ -142,7 +142,7 @@ func ReadFromBytes(fileContent []byte) (Analyzer, error) { for _, include := range checker.Include { g, err := glob.Compile(include) if err != nil { - return Analyzer{}, err + return Analyzer{}, YamlAnalyzer{}, err } pathFilter.IncludeGlobs = append(pathFilter.IncludeGlobs, g) } @@ -156,7 +156,7 @@ func ReadFromBytes(fileContent []byte) (Analyzer, error) { queryStr := filter.PatternInside + " @" + filterPatternKey query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) if err != nil { - return Analyzer{}, err + return Analyzer{}, YamlAnalyzer{}, err } filters = append(filters, NodeFilter{ @@ -169,7 +169,7 @@ func ReadFromBytes(fileContent []byte) (Analyzer, error) { queryStr := filter.PatternNotInside + " @" + filterPatternKey query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) if err != nil { - return Analyzer{}, err + return Analyzer{}, YamlAnalyzer{}, err } filters = append(filters, NodeFilter{ @@ -203,7 +203,7 @@ func ReadFromBytes(fileContent []byte) (Analyzer, error) { } patternChecker.Run = RunYamlAnalyzer(yamlAnalyzer) - return *patternChecker, nil + return *patternChecker, *yamlAnalyzer, nil } func RunYamlAnalyzer(YamlAnalyzer *YamlAnalyzer) func(pass *Pass) (any, error) { diff --git a/analysis/yaml_test.go b/analysis/yaml_test.go new file mode 100644 index 00000000..6ff5f4a9 --- /dev/null +++ b/analysis/yaml_test.go @@ -0,0 +1,76 @@ +package analysis + +import ( + "testing" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReadFile(t *testing.T) { + path := "./testdata/mock-checker.yml" + ana, anaYaml, err := ReadFromFile(path) + + require.Nil(t, err) + name := ana.Name + assert.Equal(t, name, "mock-checker") + language := ana.Language + assert.Equal(t, language, LangJs) + category := ana.Category + assert.Equal(t, category, CategoryStyle) + severity := ana.Severity + assert.Equal(t, severity, SeverityInfo) + assert.Equal(t, anaYaml.Message, "This is just a mock checker") + assert.Equal(t, len(anaYaml.Patterns), 1) +} + +func TestNodeFilters(t *testing.T) { + jsData := ` + var globalVar = 1; // shouldn't match + function test() { + var localVar = 2; // Should match + let anotherVar = 3; // should match + } + ` + path := "./testdata/node-filter-checker.yml" + ana, _, err := ReadFromFile(path) + require.NoError(t, err, "Failed to read YAML data") + + parsedJs, err := Parse("", []byte(jsData), LangJs, LangJs.Grammar()) + require.NoError(t, err, "Failed to parse JS data") + + var matchCount int + var matches []string + + reportFunc := func(pass *Pass, node *sitter.Node, message string) { + matchCount++ + t.Log(node.Content(pass.FileContext.Source)) + matches = append(matches, message) + } + + pass := &Pass{ + Analyzer: &ana, + FileContext: parsedJs, + Report: reportFunc, + Files: []*ParseResult{parsedJs}, + } + + _, err = ana.Run(pass) + require.NoError(t, err, "Failed to run YAML analyzer") + assert.Equal(t, matchCount, 2, "Expected 2 matches") +} + +func TestNodeFilterWithTests(t *testing.T) { + path := "./testdata/node-filter-test-checker.yml" + ana, yamlAna, err := ReadFromFile(path) + require.NoError(t, err, "Failed to read YAML data") + assert.Equal(t, ana.Name, "node-filter-test-checker") + assert.Len(t, yamlAna.NodeFilter, 2) + + diff, log, passed, err := RunAnalyzerTests("./testdata", []*Analyzer{&ana}) + require.NoError(t, err, "Failed to run analyzer tests") + t.Logf("Diff: %s", diff) + t.Logf("Log: %s", log) + assert.True(t, passed) +} \ No newline at end of file diff --git a/checkers/checker.go b/checkers/checker.go index 88c40286..627840ac 100644 --- a/checkers/checker.go +++ b/checkers/checker.go @@ -34,7 +34,7 @@ func findYamlCheckers(checkersMap map[goAnalysis.Language][]goAnalysis.Analyzer) return nil } - patternChecker, err := goAnalysis.ReadFromBytes(fileContent) + patternChecker, _, err := goAnalysis.ReadFromBytes(fileContent) if err != nil { return fmt.Errorf("invalid checker '%s': %s", d.Name(), err.Error()) } diff --git a/pkg/analysis/pattern_rule.go b/pkg/analysis/pattern_rule.go index 2cb6ef1c..e342ed95 100644 --- a/pkg/analysis/pattern_rule.go +++ b/pkg/analysis/pattern_rule.go @@ -357,4 +357,4 @@ package analysis // filters: filters, // } // return patternChecker, nil -// } \ No newline at end of file +// } diff --git a/pkg/analysis/scope_ts_test.go b/pkg/analysis/scope_ts_test.go index 175cf672..ca5d3da3 100644 --- a/pkg/analysis/scope_ts_test.go +++ b/pkg/analysis/scope_ts_test.go @@ -94,7 +94,7 @@ package analysis // source := ` // function greet(name, age = 18) { // let greeting = "Hello"; -// return greeting + " " + name; +// return greeting + " " + name; // } // greet("Alice") // ` diff --git a/pkg/cli/test_runner.go b/pkg/cli/test_runner.go index a8c05e55..62fb412d 100644 --- a/pkg/cli/test_runner.go +++ b/pkg/cli/test_runner.go @@ -49,7 +49,7 @@ func findTestCases(dir string) ([]testCase, error) { return nil } - patternChecker, err := ana.ReadFromFile(path) + patternChecker, _, err := ana.ReadFromFile(path) if err != nil { fmt.Fprintf(os.Stderr, "invalid checker '%s': %s\n", d.Name(), err.Error()) return nil @@ -91,7 +91,7 @@ func runTestCases(dir string) (passed bool, err error) { fmt.Fprintf(os.Stderr, "Running test case: %s\n", filepath.Base(tc.yamlCheckerPath)) // Read and parse the checker definition - checker, err := ana.ReadFromFile(tc.yamlCheckerPath) + checker, _, err := ana.ReadFromFile(tc.yamlCheckerPath) if err != nil { return false, err } From 59a5873e51b2a183dd8c13e0b982cfd273448716 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Wed, 4 Jun 2025 20:24:56 +0530 Subject: [PATCH 05/12] chore: clean up the changes, and formatting existing code properly --- analysis/scope.go | 2 +- analysis/yaml.go | 2 +- analysis/yaml_test.go | 2 +- pkg/analysis/analyze.go | 438 ---------------------------------- pkg/analysis/analyze_test.go | 166 ------------- pkg/analysis/language.go | 316 ------------------------ pkg/analysis/pattern_rule.go | 360 ---------------------------- pkg/analysis/rule.go | 33 --- pkg/analysis/scope.go | 190 --------------- pkg/analysis/scope_ts.go | 295 ----------------------- pkg/analysis/scope_ts_test.go | 133 ----------- pkg/analysis/walk.go | 96 -------- 12 files changed, 3 insertions(+), 2030 deletions(-) delete mode 100644 pkg/analysis/analyze.go delete mode 100644 pkg/analysis/analyze_test.go delete mode 100644 pkg/analysis/language.go delete mode 100644 pkg/analysis/pattern_rule.go delete mode 100644 pkg/analysis/rule.go delete mode 100644 pkg/analysis/scope.go delete mode 100644 pkg/analysis/scope_ts.go delete mode 100644 pkg/analysis/scope_ts_test.go delete mode 100644 pkg/analysis/walk.go diff --git a/analysis/scope.go b/analysis/scope.go index 91c971a4..1a55c070 100644 --- a/analysis/scope.go +++ b/analysis/scope.go @@ -147,9 +147,9 @@ func buildScopeTree( if builder.NodeCreatesScope(node) { nextScope = NewScope(scope) scopeOfNode[node] = nextScope - scope.AstNode = node if scope != nil { scope.Children = append(scope.Children, nextScope) + scope.AstNode = node } else { scope = nextScope // root } diff --git a/analysis/yaml.go b/analysis/yaml.go index 98f2f753..2548b2e9 100644 --- a/analysis/yaml.go +++ b/analysis/yaml.go @@ -81,7 +81,7 @@ func ReadFromFile(filePath string) (Analyzer, YamlAnalyzer, error) { } // ReadFromBytes reads a pattern checker definition from bytes array -func ReadFromBytes(fileContent []byte) (Analyzer, YamlAnalyzer,error) { +func ReadFromBytes(fileContent []byte) (Analyzer, YamlAnalyzer, error) { var checker Yaml if err := yaml.Unmarshal(fileContent, &checker); err != nil { return Analyzer{}, YamlAnalyzer{}, err diff --git a/analysis/yaml_test.go b/analysis/yaml_test.go index 6ff5f4a9..1cb08677 100644 --- a/analysis/yaml_test.go +++ b/analysis/yaml_test.go @@ -73,4 +73,4 @@ func TestNodeFilterWithTests(t *testing.T) { t.Logf("Diff: %s", diff) t.Logf("Log: %s", log) assert.True(t, passed) -} \ No newline at end of file +} diff --git a/pkg/analysis/analyze.go b/pkg/analysis/analyze.go deleted file mode 100644 index 9a344286..00000000 --- a/pkg/analysis/analyze.go +++ /dev/null @@ -1,438 +0,0 @@ -package analysis - -// import ( -// "fmt" -// "path/filepath" -// "regexp" -// "strings" - -// sitter "github.com/smacker/go-tree-sitter" -// ana "globstar.dev/analysis" -// ) - -// // type Issue struct { -// // // The category of the issue -// // Category config.Category -// // // The severity of the issue -// // Severity config.Severity -// // // The message to display to the user -// // Message string -// // // The file path of the file that the issue was found in -// // Filepath string -// // // The range of the issue in the source code -// // Range sitter.Range -// // // (optional) The AST node that caused the issue -// // Node *sitter.Node -// // // Id is a unique ID for the issue. -// // // Issue that have 'Id's can be explained using the `globstar desc` command. -// // Id *string -// // } - -// // func (i *Issue) AsJson() ([]byte, error) { -// // type location struct { -// // Row int `json:"row"` -// // Column int `json:"column"` -// // } - -// // type position struct { -// // Filename string `json:"filename"` -// // Start location `json:"start"` -// // End location `json:"end"` -// // } - -// // type issueJson struct { -// // Category config.Category `json:"category"` -// // Severity config.Severity `json:"severity"` -// // Message string `json:"message"` -// // Range position `json:"range"` -// // Id string `json:"id"` -// // } -// // issue := issueJson{ -// // Category: i.Category, -// // Severity: i.Severity, -// // Message: i.Message, -// // Range: position{ -// // Filename: i.Filepath, -// // Start: location{ -// // Row: int(i.Range.StartPoint.Row), -// // Column: int(i.Range.StartPoint.Column), -// // }, -// // End: location{ -// // Row: int(i.Range.EndPoint.Row), -// // Column: int(i.Range.EndPoint.Column), -// // }, -// // }, -// // Id: *i.Id, -// // } - -// // return json.Marshal(issue) -// // } - -// // func (i *Issue) AsText() ([]byte, error) { -// // return []byte(fmt.Sprintf("%s:%d:%d:%s", i.Filepath, i.Range.StartPoint.Row, i.Range.StartPoint.Column, i.Message)), nil -// // } - -// type Analyzer struct { -// Language Language -// // WorkDir is the directory in which the analysis is being run. -// WorkDir string -// // ParseResult is the result of parsing a file with a tree-sitter parser, -// // along with some extra appendages (e.g: scope information). -// ParseResult *ParseResult -// // checkers is a list of all checkers that should be applied to the AST -// // for this language. -// checkers []Checker -// // patternCheckers is a list of all checkers that run after a query is run on the AST. -// // Usually, these are written in a DSL (which, for now, is the tree-sitter S-Expression query language) -// YamlCheckers []YamlChecker -// // entryCheckers maps node types to the checkers that should be applied -// // when entering that node. -// entryCheckersForNode map[string][]Checker -// // exitCheckers maps node types to the checkers that should be applied -// // when leaving that node. -// exitCheckersForNode map[string][]Checker -// issuesRaised []*ana.Issue -// } - -// type SkipComment struct { -// // the line number for the skipcq comment -// CommentLine int -// // the entire text of the skipcq comment -// CommentText string -// // (optional) name of the checker for targetted skip -// CheckerIds []string -// } - -// // package level cache to store comments for each file -// var fileSkipComment = make(map[string][]*SkipComment) - -// func InitializeSkipComments(analyzers []*Analyzer) { -// fileSkipComments := make(map[string][]*SkipComment) - -// processedPaths := make(map[string]bool) - -// for _, analyzer := range analyzers { -// filepath := analyzer.ParseResult.FilePath -// if processedPaths[filepath] { -// continue -// } - -// processedPaths[filepath] = true -// fileSkipComments[filepath] = GatherSkipInfo(analyzer.ParseResult) -// } -// } - -// func FromFile(filePath string, baseCheckers []Checker) (*Analyzer, error) { -// res, err := ParseFile(filePath) -// if err != nil { -// return nil, err -// } - -// return NewAnalyzer(res, baseCheckers), nil -// } - -// func NewAnalyzer(file *ParseResult, checkers []Checker) *Analyzer { -// ana := &Analyzer{ -// ParseResult: file, -// Language: file.Language, -// entryCheckersForNode: map[string][]Checker{}, -// exitCheckersForNode: map[string][]Checker{}, -// } - -// for _, checker := range checkers { -// ana.AddChecker(checker) -// } - -// return ana -// } - -// func (ana *Analyzer) Analyze() []*ana.Issue { -// WalkTree(ana.ParseResult.Ast, ana) -// ana.runPatternCheckers() -// return ana.issuesRaised -// } - -// func (ana *Analyzer) AddChecker(checker Checker) { -// ana.checkers = append(ana.checkers, checker) -// typ := checker.NodeType() - -// if checker.OnEnter() != nil { -// ana.entryCheckersForNode[typ] = append(ana.entryCheckersForNode[typ], checker) -// } - -// if checker.OnLeave() != nil { -// ana.exitCheckersForNode[typ] = append(ana.exitCheckersForNode[typ], checker) -// } -// } - -// func (ana *Analyzer) OnEnterNode(node *sitter.Node) bool { -// nodeType := node.Type() -// checkers := ana.entryCheckersForNode[nodeType] -// for _, checker := range checkers { -// visitFn := checker.OnEnter() -// if visitFn != nil { -// (*visitFn)(checker, ana, node) -// } -// } -// return true -// } - -// func (ana *Analyzer) OnLeaveNode(node *sitter.Node) { -// nodeType := node.Type() -// checkers := ana.exitCheckersForNode[nodeType] -// for _, checker := range checkers { -// visitFn := checker.OnLeave() -// if visitFn != nil { -// (*visitFn)(checker, ana, node) -// } -// } -// } - -// func (ana *Analyzer) shouldSkipChecker(checker YamlChecker) bool { -// pathFilter := checker.PathFilter() -// if pathFilter == nil { -// // no filter is set, so we should not skip this checker -// return false -// } - -// relPath := ana.ParseResult.FilePath -// if ana.WorkDir != "" { -// rel, err := filepath.Rel(ana.WorkDir, ana.ParseResult.FilePath) -// if err == nil { -// relPath = rel -// } -// } - -// if len(pathFilter.ExcludeGlobs) > 0 { -// for _, excludeGlob := range pathFilter.ExcludeGlobs { -// if excludeGlob.Match(relPath) { -// return true -// } -// } - -// // no exclude globs matched, so we should not skip this checker -// return false -// } - -// if len(pathFilter.IncludeGlobs) > 0 { -// for _, includeGlob := range pathFilter.IncludeGlobs { -// if includeGlob.Match(relPath) { -// return false -// } -// } - -// // no include globs matched, so we should skip this checker -// return true -// } - -// return false -// } - -// func (ana *Analyzer) filterMatchesParent(filter *NodeFilter, parent *sitter.Node) bool { -// qc := sitter.NewQueryCursor() -// defer qc.Close() - -// qc.Exec(filter.query, parent) - -// // check if the filter matches the `parent` node -// for { -// m, ok := qc.NextMatch() -// if !ok { -// break -// } - -// m = qc.FilterPredicates(m, ana.ParseResult.Source) -// for _, capture := range m.Captures { -// captureName := filter.query.CaptureNameForId(capture.Index) -// if captureName == filterPatternKey && capture.Node == parent { -// return true -// } -// } -// } - -// return false -// } - -// // runParentFilters checks if the parent filters for a checker match the given node. -// func (ana *Analyzer) runParentFilters(checker YamlChecker, node *sitter.Node) bool { -// filters := checker.NodeFilters() -// if len(filters) == 0 { -// return true -// } - -// for _, filter := range filters { -// shouldMatch := filter.shouldMatch -// nodeMatched := false - -// // The matched node is expected to be a child of some other -// // node, but it has no parents (is a top-level node) -// if node.Parent() == nil && filter.shouldMatch { -// return false -// } - -// for parent := node.Parent(); parent != nil; parent = parent.Parent() { -// if ana.filterMatchesParent(&filter, parent) { -// nodeMatched = true -// if !shouldMatch { -// // pattern-not-inside matched, so this checker should be skipped -// return false -// } else { -// // pattern-inside matched, so we can break out of the loop -// break -// } -// } -// } - -// if !nodeMatched && shouldMatch { -// return false -// } -// } - -// return true -// } - -// func (ana *Analyzer) executeCheckerQuery(checker YamlChecker, query *sitter.Query) { -// qc := sitter.NewQueryCursor() -// defer qc.Close() - -// qc.Exec(query, ana.ParseResult.Ast) -// for { -// m, ok := qc.NextMatch() - -// if !ok { -// break -// } - -// m = qc.FilterPredicates(m, ana.ParseResult.Source) -// for _, capture := range m.Captures { -// captureName := query.CaptureNameForId(capture.Index) -// // TODO: explain why captureName == checker.Name() -// if captureName == checker.Name() && ana.runParentFilters(checker, capture.Node) { -// checker.OnMatch(ana, query, capture.Node, m.Captures) -// } -// } -// } -// } - -// // runPatternCheckers executes all checkers that are written as AST queries. -// func (ana *Analyzer) runPatternCheckers() { -// for _, checker := range ana.YamlCheckers { -// if ana.shouldSkipChecker(checker) { -// continue -// } - -// queries := checker.Patterns() -// for _, q := range queries { -// ana.executeCheckerQuery(checker, q) -// } -// } -// } - -// func (ana *Analyzer) Report(issue *ana.Issue) { -// ana.issuesRaised = append(ana.issuesRaised, issue) -// } - -// func RunYamlCheckers(path string, analyzers []*Analyzer) ([]*ana.Issue, error) { -// InitializeSkipComments(analyzers) - -// issues := []*ana.Issue{} -// for _, analyzer := range analyzers { -// issues = append(issues, analyzer.Analyze()...) -// } -// return issues, nil -// } - -// func GatherSkipInfo(fileContext *ParseResult) []*SkipComment { -// var skipLines []*SkipComment - -// commentIdentifier := GetEscapedCommentIdentifierFromPath(fileContext.FilePath) -// pattern := fmt.Sprintf(`%s(?i).*?\bskipcq\b(?::(?:\s*(?P([A-Za-z\-_0-9]*(?:,\s*)?)+))?)?`, commentIdentifier) -// skipRegexp := regexp.MustCompile(pattern) - -// query, err := sitter.NewQuery([]byte("(comment) @skipcq"), fileContext.Language.Grammar()) - -// if err != nil { -// return skipLines -// } - -// cursor := sitter.NewQueryCursor() -// cursor.Exec(query, fileContext.Ast) - -// // gather all skipcq comment lines in a single pass -// for { -// m, ok := cursor.NextMatch() -// if !ok { -// break -// } - -// for _, capture := range m.Captures { -// captureName := query.CaptureNameForId(capture.Index) -// if captureName != "skipcq" { -// continue -// } - -// commentNode := capture.Node -// commentLine := int(commentNode.StartPoint().Row) -// commentText := commentNode.Content(fileContext.Source) - -// matches := skipRegexp.FindStringSubmatch(commentText) -// if matches != nil { -// issueIdsIdx := skipRegexp.SubexpIndex("issue_ids") -// var checkerIds []string - -// if issueIdsIdx != -1 && issueIdsIdx < len(matches) && matches[issueIdsIdx] != "" { -// issueIdsIdx := matches[issueIdsIdx] -// idSlice := strings.Split(issueIdsIdx, ",") -// for _, id := range idSlice { -// trimmedId := strings.TrimSpace(id) -// if trimmedId != "" { -// checkerIds = append(checkerIds, trimmedId) -// } -// } -// } - -// skipLines = append(skipLines, &SkipComment{ -// CommentLine: commentLine, -// CommentText: commentText, -// CheckerIds: checkerIds, // will be empty for generic skipcq -// }) -// } - -// } -// } - -// return skipLines -// } - -// func (ana *Analyzer) ContainsSkipcq(skipLines []*SkipComment, issue *ana.Issue) bool { -// if len(skipLines) == 0 { -// return false -// } - -// issueNode := issue.Node -// nodeLine := int(issueNode.StartPoint().Row) -// prevLine := nodeLine - 1 - -// var checkerId string -// if issue.Id != nil { -// checkerId = *issue.Id -// } - -// for _, comment := range skipLines { -// if comment.CommentLine != nodeLine && comment.CommentLine != prevLine { -// continue -// } - -// if len(comment.CheckerIds) > 0 { -// for _, id := range comment.CheckerIds { -// if checkerId == id { -// return true -// } -// } -// } else { -// return true -// } -// } - -// return false -// } diff --git a/pkg/analysis/analyze_test.go b/pkg/analysis/analyze_test.go deleted file mode 100644 index dc96b74b..00000000 --- a/pkg/analysis/analyze_test.go +++ /dev/null @@ -1,166 +0,0 @@ -package analysis - -// import ( -// "testing" - -// sitter "github.com/smacker/go-tree-sitter" -// "github.com/stretchr/testify/assert" -// "github.com/stretchr/testify/require" -// "globstar.dev/analysis" -// ) - -// func parseTestFile(t *testing.T, filename string, source string, language Language) *ParseResult { -// parsed, err := Parse(filename, []byte(source), language, language.Grammar()) -// require.NoError(t, err) -// require.NotNil(t, parsed) -// return parsed -// } - -// func TestSkipCq(t *testing.T) { -// tests := []struct { -// name string -// checkerId string -// source string -// language Language -// want bool -// }{ -// { -// name: "skipcq comment on same line", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// def someFunc(a, b): -// assert a == b # skipcq -// `, -// want: true, -// }, -// { -// name: "skipcq comment on previous line", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// if True: -// # skipcq -// assert 1 == 2 -// `, -// want: true, -// }, -// { -// name: "skipcq comment with target checker", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// if a > 20: -// # skipcq: no-assert -// assert 5 == 0 -// `, -// want: true, -// }, -// { -// name: "skipcq comment with mismatches target checker", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// assert a >= float('inf') # skipcq: csv-writer -// `, -// want: false, -// }, -// { -// name: "skipcq comment not present", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// assert a == b -// `, -// want: false, -// }, -// { -// name: "skipcq with multiple targets matching", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// # skipcq: csv-writer, no-assert -// assert 1 == 10 -// `, -// want: true, -// }, -// { -// name: "skipcq with multiple targets mismatching", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// assert 2==1 # skipcq: csv-writer, flask-error -// `, -// want: false, -// }, -// { -// name: "skipcq with extra comments target match", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// def aFunc(): -// assert a == b # some comment skipcq: no-assert, sql-inject # nosec, -// `, -// want: true, -// }, -// { -// name: "skipcq with extra comments target unmatched", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// assert a is b # should be true skipcq: sql-inject, django-taint # more -// `, -// want: false, -// }, -// { -// name: "skipcq with extra comments no target", -// checkerId: "no-assert", -// language: LangPy, -// source: ` -// if True: -// assert 1 == 2 # must be false skipcq # nosec, -// `, -// want: true, -// }, -// } - -// for _, tt := range tests { -// t.Run(tt.name, func(t *testing.T) { -// parsed := parseTestFile(t, "no-assert.test.py", tt.source, tt.language) -// analyzer := &Analyzer{ -// Language: tt.language, -// ParseResult: parsed, -// } - -// query, err := sitter.NewQuery([]byte("(assert_statement) @assert"), tt.language.Grammar()) -// require.NoError(t, err) - -// cursor := sitter.NewQueryCursor() -// cursor.Exec(query, parsed.Ast) - -// match, ok := cursor.NextMatch() -// require.True(t, ok, "failed to find assert statements") - -// var assertNode *sitter.Node -// for _, captureNode := range match.Captures { -// if query.CaptureNameForId(captureNode.Index) == "assert" { -// assertNode = captureNode.Node -// break -// } -// } - -// require.NotNil(t, assertNode, "failed to capture assert node") - -// issue := &analysis.Issue{ -// Filepath: "no-assert.test.py", -// Node: assertNode, -// Id: &tt.checkerId, -// } - -// skipComments := GatherSkipInfo(parsed) - -// res := analyzer.ContainsSkipcq(skipComments, issue) -// assert.Equal(t, tt.want, res) -// }) -// } -// } diff --git a/pkg/analysis/language.go b/pkg/analysis/language.go deleted file mode 100644 index a30046f2..00000000 --- a/pkg/analysis/language.go +++ /dev/null @@ -1,316 +0,0 @@ -package analysis - -// import ( -// "context" -// "fmt" -// "os" -// "path/filepath" - -// sitter "github.com/smacker/go-tree-sitter" - -// treeSitterBash "github.com/smacker/go-tree-sitter/bash" -// treeSitterCsharp "github.com/smacker/go-tree-sitter/csharp" -// treeSitterCss "github.com/smacker/go-tree-sitter/css" -// treeSitterDockerfile "github.com/smacker/go-tree-sitter/dockerfile" -// treeSitterElixir "github.com/smacker/go-tree-sitter/elixir" -// treeSitterElm "github.com/smacker/go-tree-sitter/elm" -// treeSitterGo "github.com/smacker/go-tree-sitter/golang" -// treeSitterGroovy "github.com/smacker/go-tree-sitter/groovy" -// treeSitterHcl "github.com/smacker/go-tree-sitter/hcl" -// treeSitterHtml "github.com/smacker/go-tree-sitter/html" -// treeSitterJava "github.com/smacker/go-tree-sitter/java" -// treeSitterKotlin "github.com/smacker/go-tree-sitter/kotlin" -// treeSitterLua "github.com/smacker/go-tree-sitter/lua" -// treeSitterOCaml "github.com/smacker/go-tree-sitter/ocaml" -// treeSitterPhp "github.com/smacker/go-tree-sitter/php" -// treeSitterPy "github.com/smacker/go-tree-sitter/python" -// treeSitterRuby "github.com/smacker/go-tree-sitter/ruby" -// treeSitterRust "github.com/smacker/go-tree-sitter/rust" -// treeSitterScala "github.com/smacker/go-tree-sitter/scala" -// treeSitterSql "github.com/smacker/go-tree-sitter/sql" -// treeSitterSwift "github.com/smacker/go-tree-sitter/swift" -// treeSitterTsx "github.com/smacker/go-tree-sitter/typescript/tsx" -// treeSitterTs "github.com/smacker/go-tree-sitter/typescript/typescript" -// ) - -// // ParseResult is the result of parsing a file. -// type ParseResult struct { -// // Ast is the root node of the tree-sitter parse-tree -// // representing this file -// Ast *sitter.Node -// // Source is the raw source code of the file -// Source []byte -// // FilePath is the path to the file that was parsed -// FilePath string -// // Language is the tree-sitter language used to parse the file -// TsLanguage *sitter.Language -// // Language is the language of the file -// Language Language -// // ScopeTree represents the scope hierarchy of the file. -// // Can be nil if scope support for this language has not been implemented yet. -// ScopeTree *ScopeTree -// } - -// type Language int - -// const ( -// LangUnknown Language = iota -// LangPy -// LangJs // vanilla JS and JSX -// LangTs // TypeScript (not TSX) -// LangTsx // TypeScript with JSX extension -// LangJava -// LangRuby -// LangRust -// LangYaml -// LangCss -// LangDockerfile -// LangMarkdown -// LangSql -// LangKotlin -// LangOCaml -// LangLua -// LangBash -// LangCsharp -// LangElixir -// LangElm -// LangGo -// LangGroovy -// LangHcl -// LangHtml -// LangPhp -// LangScala -// LangSwift -// ) - -// // tsGrammarForLang returns the tree-sitter grammar for the given language. -// // May return `nil` when `lang` is `LangUnkown`. -// func (lang Language) Grammar() *sitter.Language { -// switch lang { -// case LangPy: -// return treeSitterPy.GetLanguage() -// case LangJs: -// return treeSitterTsx.GetLanguage() // Use TypeScript's JSX grammar for JS/JSX -// case LangTs: -// return treeSitterTs.GetLanguage() -// case LangTsx: -// return treeSitterTsx.GetLanguage() -// case LangJava: -// return treeSitterJava.GetLanguage() -// case LangRuby: -// return treeSitterRuby.GetLanguage() -// case LangRust: -// return treeSitterRust.GetLanguage() -// case LangSql: -// return treeSitterSql.GetLanguage() -// case LangKotlin: -// return treeSitterKotlin.GetLanguage() -// case LangCss: -// return treeSitterCss.GetLanguage() -// case LangOCaml: -// return treeSitterOCaml.GetLanguage() -// case LangLua: -// return treeSitterLua.GetLanguage() -// case LangDockerfile: -// return treeSitterDockerfile.GetLanguage() -// case LangBash: -// return treeSitterBash.GetLanguage() -// case LangCsharp: -// return treeSitterCsharp.GetLanguage() -// case LangElixir: -// return treeSitterElixir.GetLanguage() -// case LangElm: -// return treeSitterElm.GetLanguage() -// case LangGo: -// return treeSitterGo.GetLanguage() -// case LangGroovy: -// return treeSitterGroovy.GetLanguage() -// case LangHcl: -// return treeSitterHcl.GetLanguage() -// case LangHtml: -// return treeSitterHtml.GetLanguage() -// case LangPhp: -// return treeSitterPhp.GetLanguage() -// case LangScala: -// return treeSitterScala.GetLanguage() -// case LangSwift: -// return treeSitterSwift.GetLanguage() -// default: -// return nil -// } -// } - -// // NOTE(@injuly): TypeScript and TSX have to parsed with DIFFERENT -// // grammars. Otherwise, because an expression like `bar` is -// // parsed as a (legacy) type-cast in TS, but a JSXElement in TSX. -// // See: https://facebook.github.io/jsx/#prod-JSXElement - -// // LanguageFromFilePath returns the Language of the file at the given path -// // returns `LangUnkown` if the language is not recognized (e.g: `.txt` files). -// func LanguageFromFilePath(path string) Language { -// ext := filepath.Ext(path) -// switch ext { -// case ".py": -// return LangPy -// // TODO: .jsx and .js can both have JSX syntax -_- -// case ".js", ".jsx": -// return LangJs -// case ".ts": -// return LangTs -// case ".tsx": -// return LangTs -// case ".java": -// return LangJava -// case ".rb": -// return LangRuby -// case ".rs": -// return LangRust -// case ".css": -// return LangCss -// case ".Dockerfile": -// return LangDockerfile -// case ".sql": -// return LangSql -// case ".kt": -// return LangKotlin -// case ".ml": -// return LangOCaml -// case ".lua": -// return LangLua -// case ".sh": -// return LangBash -// case ".cs": -// return LangCsharp -// case ".ex": -// return LangElixir -// case ".elm": -// return LangElm -// case ".go": -// return LangGo -// case ".groovy": -// return LangGroovy -// case ".tf": -// return LangHcl -// case ".html": -// return LangHtml -// case ".php": -// return LangPhp -// case ".scala": -// return LangScala -// case ".swift": -// return LangSwift -// default: -// return LangUnknown -// } -// } - -// func GetExtFromLanguage(lang Language) string { -// switch lang { -// case LangPy: -// return ".py" -// case LangJs: -// return ".js" -// case LangTs: -// return ".ts" -// case LangTsx: -// return ".tsx" -// case LangJava: -// return ".java" -// case LangRuby: -// return ".rb" -// case LangRust: -// return ".rs" -// case LangYaml: -// return ".yaml" -// case LangCss: -// return ".css" -// case LangDockerfile: -// return ".Dockerfile" -// case LangSql: -// return ".sql" -// case LangKotlin: -// return ".kt" -// case LangOCaml: -// return ".ml" -// case LangLua: -// return ".lua" -// case LangBash: -// return ".sh" -// case LangCsharp: -// return ".cs" -// case LangElixir: -// return ".ex" -// case LangElm: -// return ".elm" -// case LangGo: -// return ".go" -// case LangGroovy: -// return ".groovy" -// case LangHcl: -// return ".tf" -// case LangHtml: -// return ".html" -// case LangPhp: -// return ".php" -// case LangScala: -// return ".scala" -// case LangSwift: -// return ".swift" -// default: -// return "" -// } -// } - -// func Parse(filePath string, source []byte, language Language, grammar *sitter.Language) (*ParseResult, error) { -// ast, err := sitter.ParseCtx(context.Background(), source, grammar) -// if err != nil { -// return nil, fmt.Errorf("failed to parse %s", filePath) -// } - -// scopeTree := MakeScopeTree(language, ast, source) -// parseResult := &ParseResult{ -// Ast: ast, -// Source: source, -// FilePath: filePath, -// TsLanguage: grammar, -// Language: language, -// ScopeTree: scopeTree, -// } - -// return parseResult, nil -// } - -// // ParseFile parses the file at the given path using the appropriate -// // tree-sitter grammar. -// func ParseFile(filePath string) (*ParseResult, error) { -// lang := LanguageFromFilePath(filePath) -// grammar := lang.Grammar() -// if grammar == nil { -// return nil, fmt.Errorf("unsupported file type: %s", filePath) -// } - -// source, err := os.ReadFile(filePath) -// if err != nil { -// return nil, err -// } - -// return Parse(filePath, source, lang, grammar) -// } - -// func GetEscapedCommentIdentifierFromPath(path string) string { -// lang := LanguageFromFilePath(path) -// switch lang { -// case LangJs, LangTs, LangTsx, LangJava, LangRust, LangCss, LangMarkdown, LangKotlin, LangCsharp, LangGo, LangGroovy, LangPhp, LangScala, LangSwift: -// return "\\/\\/" -// case LangPy, LangLua, LangBash, LangRuby, LangYaml, LangDockerfile, LangElixir, LangHcl: -// return "#" -// case LangSql, LangElm: -// return "--" -// case LangHtml: -// return "<\\!--" -// case LangOCaml: -// return "\\(\\*" -// default: -// return "" -// } -// } diff --git a/pkg/analysis/pattern_rule.go b/pkg/analysis/pattern_rule.go deleted file mode 100644 index e342ed95..00000000 --- a/pkg/analysis/pattern_rule.go +++ /dev/null @@ -1,360 +0,0 @@ -package analysis - -// import ( -// "fmt" -// "os" -// "strings" - -// "github.com/gobwas/glob" -// sitter "github.com/smacker/go-tree-sitter" -// analysis "globstar.dev/analysis" -// "globstar.dev/pkg/config" -// "gopkg.in/yaml.v3" -// ) - -// // To get a node back from a tree-sitter query, it *must* have a capture name. -// // So: (call_expression) will match nothing, but (call_expression) @some_key -// // will match all call expressions. -// // For filtering patterns with clauses in the yaml file, like: -// // filters: -// // - pattern-inside: (call_expression) -// // - pattern-not-inside: (catch_block) -// // -// // We need a to append a key name at the end of the pattern written by the user. -// // This is the key that we will use. -// const filterPatternKey = "__filter__key__" - -// // A YamlChecker is a checker that matches a tree-sitter query pattern -// // and reports an issue when the pattern is found. -// // Unlike regular issues, PatternCheckers are not associated with a specific node type, rather -// // they are invoked for *every* node that matches the pattern. -// type YamlChecker interface { -// Name() string -// Patterns() []*sitter.Query -// Language() Language -// Category() config.Category -// Severity() config.Severity -// OnMatch( -// ana *Analyzer, // the analyzer instance -// matchedQuery *sitter.Query, // the query that found an AST node -// matchedNode *sitter.Node, // the AST node that matched the query -// captures []sitter.QueryCapture, // list of captures made inside the query -// ) -// PathFilter() *PathFilter -// NodeFilters() []NodeFilter -// } - -// // NodeFilter is a filter that can be applied to a PatternChecker to restrict -// // the the nodes that the checker is applied to. -// // The checker is only applied to nodes that have a parent matching (or not matching) the query. -// type NodeFilter struct { -// query *sitter.Query -// shouldMatch bool -// } - -// // PathFilter is a glob that can be applied to a PatternChecker to restrict -// // the files that the checker is applied to. -// type PathFilter struct { -// ExcludeGlobs []glob.Glob -// IncludeGlobs []glob.Glob -// } - -// type patternCheckerImpl struct { -// language Language -// patterns []*sitter.Query -// issueMessage string -// issueId string -// category config.Category -// severity config.Severity -// pathFilter *PathFilter -// filters []NodeFilter -// } - -// func (r *patternCheckerImpl) Language() Language { -// return r.language -// } - -// func (r *patternCheckerImpl) Patterns() []*sitter.Query { -// return r.patterns -// } - -// func (r *patternCheckerImpl) OnMatch( -// ana *Analyzer, -// matchedQuery *sitter.Query, -// matchedNode *sitter.Node, -// captures []sitter.QueryCapture, -// ) { - -// // replace all '@' with the corresponding capture value -// message := r.issueMessage -// // TODO: 1. escape '@' in the message, 2. use a more efficient way to replace -// for strings.ContainsRune(message, '@') { -// for _, capture := range captures { -// captureName := matchedQuery.CaptureNameForId(capture.Index) -// message = strings.ReplaceAll( -// message, -// "@"+captureName, -// capture.Node.Content(ana.ParseResult.Source), -// ) -// } -// } - -// raisedIssue := &analysis.Issue{ -// Message: message, -// Filepath: ana.ParseResult.FilePath, -// Category: analysis.Category(r.Category()), -// Severity: analysis.Severity(r.Severity()), -// Id: &r.issueId, -// Node: matchedNode, -// } - -// filepath := ana.ParseResult.FilePath -// skipComments := fileSkipComment[filepath] -// if !ana.ContainsSkipcq(skipComments, raisedIssue) { -// ana.Report(raisedIssue) -// } -// } - -// func (r *patternCheckerImpl) Name() string { -// return r.issueId -// } - -// func (r *patternCheckerImpl) PathFilter() *PathFilter { -// return r.pathFilter -// } - -// func (r *patternCheckerImpl) NodeFilters() []NodeFilter { -// return r.filters -// } - -// func (r *patternCheckerImpl) Category() config.Category { -// return r.category -// } - -// func (r *patternCheckerImpl) Severity() config.Severity { -// return r.severity -// } - -// func CreatePatternChecker( -// patterns []*sitter.Query, -// language Language, -// issueMessage string, -// issueId string, -// pathFilter *PathFilter, -// ) YamlChecker { -// return &patternCheckerImpl{ -// language: language, -// patterns: patterns, -// issueMessage: issueMessage, -// issueId: issueId, -// pathFilter: pathFilter, -// } -// } - -// type filterYAML struct { -// PatternInside string `yaml:"pattern-inside,omitempty"` -// PatternNotInside string `yaml:"pattern-not-inside,omitempty"` -// } - -// type PatternCheckerFile struct { -// Language string `yaml:"language"` -// Code string `yaml:"name"` -// Message string `yaml:"message"` -// Category config.Category `yaml:"category"` -// Severity config.Severity `yaml:"severity"` -// // Pattern is a single pattern in the form of: -// // pattern: (some_pattern) -// // in the YAML file -// Pattern string `yaml:"pattern,omitempty"` -// // Patterns are ultiple patterns in the form of: -// // pattern: (something) -// // in the YAML file -// Patterns []string `yaml:"patterns,omitempty"` -// Description string `yaml:"description,omitempty"` -// Filters []filterYAML `yaml:"filters,omitempty"` -// Exclude []string `yaml:"exclude,omitempty"` -// Include []string `yaml:"include,omitempty"` -// } - -// // DecodeLanguage converts a stringified language name to its corresponding -// // Language enum -// func DecodeLanguage(language string) Language { -// language = strings.ToLower(language) -// switch language { -// case "javascript", "js": -// return LangJs -// case "typescript", "ts": -// return LangTs -// case "jsx", "tsx": -// return LangTsx -// case "python", "py": -// return LangPy -// case "ocaml", "ml": -// return LangOCaml -// case "docker", "dockerfile": -// return LangDockerfile -// case "java": -// return LangJava -// case "kotlin", "kt": -// return LangKotlin -// case "rust", "rs": -// return LangRust -// case "ruby", "rb": -// return LangRuby -// case "lua": -// return LangLua -// case "yaml", "yml": -// return LangYaml -// case "sql": -// return LangSql -// case "css", "css3": -// return LangCss -// case "markdown", "md": -// return LangMarkdown -// case "sh", "bash": -// return LangBash -// case "csharp", "cs": -// return LangCsharp -// case "elixir", "ex": -// return LangElixir -// case "elm": -// return LangElm -// case "go": -// return LangGo -// case "groovy": -// return LangGroovy -// case "hcl", "tf": -// return LangHcl -// case "html": -// return LangHtml -// case "php": -// return LangPhp -// case "scala": -// return LangScala -// case "swift": -// return LangSwift -// default: -// return LangUnknown -// } -// } - -// // ReadFromFile reads a pattern checker definition from a YAML config file. -// func ReadFromFile(filePath string) (Analyzer, error) { -// fileContent, err := os.ReadFile(filePath) -// if err != nil { -// return Analyzer{}, err -// } - -// return ReadFromBytes(fileContent) -// } - -// // ReadFromBytes reads a pattern checker definition from bytes array -// func ReadFromBytes(fileContent []byte) (Analyzer, error) { -// var checker PatternCheckerFile -// if err := yaml.Unmarshal(fileContent, &checker); err != nil { -// return Analyzer{}, err -// } - -// lang := DecodeLanguage(checker.Language) -// if lang == LangUnknown { -// return nil, fmt.Errorf("unknown language code: '%s'", checker.Language) -// } - -// if checker.Code == "" { -// return nil, fmt.Errorf("no name provided in checker definition") -// } - -// if checker.Message == "" { -// return nil, fmt.Errorf("no message provided in checker '%s'", checker.Code) -// } - -// var patterns []*sitter.Query -// if checker.Pattern != "" { -// pattern, err := sitter.NewQuery([]byte(checker.Pattern), lang.Grammar()) -// if err != nil { -// return nil, err -// } -// patterns = append(patterns, pattern) -// } else if len(checker.Patterns) > 0 { -// for _, patternStr := range checker.Patterns { -// pattern, err := sitter.NewQuery([]byte(patternStr), lang.Grammar()) -// if err != nil { -// return nil, err -// } -// patterns = append(patterns, pattern) -// } -// } else { -// return nil, fmt.Errorf("no pattern provided in checker '%s'", checker.Code) -// } - -// if checker.Pattern != "" && len(checker.Patterns) > 0 { -// return nil, fmt.Errorf("only one of 'pattern' or 'patterns' can be provided in a checker definition") -// } - -// // include and exclude patterns -// var pathFilter *PathFilter -// if checker.Exclude != nil || checker.Include != nil { -// pathFilter = &PathFilter{ -// ExcludeGlobs: make([]glob.Glob, 0, len(checker.Exclude)), -// IncludeGlobs: make([]glob.Glob, 0, len(checker.Include)), -// } - -// for _, exclude := range checker.Exclude { -// g, err := glob.Compile(exclude) -// if err != nil { -// return nil, err -// } -// pathFilter.ExcludeGlobs = append(pathFilter.ExcludeGlobs, g) -// } - -// for _, include := range checker.Include { -// g, err := glob.Compile(include) -// if err != nil { -// return nil, err -// } -// pathFilter.IncludeGlobs = append(pathFilter.IncludeGlobs, g) -// } -// } - -// // node filters -// var filters []NodeFilter -// if checker.Filters != nil { -// for _, filter := range checker.Filters { -// if filter.PatternInside != "" { -// queryStr := filter.PatternInside + " @" + filterPatternKey -// query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) -// if err != nil { -// return nil, err -// } - -// filters = append(filters, NodeFilter{ -// query: query, -// shouldMatch: true, -// }) -// } - -// if filter.PatternNotInside != "" { -// queryStr := filter.PatternNotInside + " @" + filterPatternKey -// query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) -// if err != nil { -// return nil, err -// } - -// filters = append(filters, NodeFilter{ -// query: query, -// shouldMatch: false, -// }) -// } -// } -// } - -// patternChecker := &patternCheckerImpl{ -// language: lang, -// patterns: patterns, -// issueMessage: checker.Message, -// issueId: checker.Code, -// pathFilter: pathFilter, -// filters: filters, -// } -// return patternChecker, nil -// } diff --git a/pkg/analysis/rule.go b/pkg/analysis/rule.go deleted file mode 100644 index 02236394..00000000 --- a/pkg/analysis/rule.go +++ /dev/null @@ -1,33 +0,0 @@ -package analysis - -// import sitter "github.com/smacker/go-tree-sitter" - -// type VisitFn func(checker Checker, node *sitter.Node) - -// type Checker interface { -// NodeType() string -// GetLanguage() Language -// OnEnter() *VisitFn -// OnLeave() *VisitFn -// } - -// type checkerImpl struct { -// nodeType string -// language Language -// onEnter *VisitFn -// onLeave *VisitFn -// } - -// func (r *checkerImpl) NodeType() string { return r.nodeType } -// func (r *checkerImpl) GetLanguage() Language { return r.language } -// func (r *checkerImpl) OnEnter() *VisitFn { return r.onEnter } -// func (r *checkerImpl) OnLeave() *VisitFn { return r.onLeave } - -// func CreateChecker(nodeType string, language Language, onEnter, onLeave *VisitFn) Checker { -// return &checkerImpl{ -// nodeType: nodeType, -// language: language, -// onEnter: onEnter, -// onLeave: onLeave, -// } -// } diff --git a/pkg/analysis/scope.go b/pkg/analysis/scope.go deleted file mode 100644 index 9a2999f2..00000000 --- a/pkg/analysis/scope.go +++ /dev/null @@ -1,190 +0,0 @@ -// A language agnostic interface for scope handling which -// also handles forward declarations and references (e.g: hoisting). -// BUT, references aren't tracked across files in a language like Golang or C++ (macros/extern/using namespace) - -package analysis - -// import sitter "github.com/smacker/go-tree-sitter" - -// // Reference represents a variable reference inside a source file -// // Cross-file references like those in Golang and C++ (macros/extern) are NOT supported, -// // so this shouldn't be used for checkers like "unused-variable", but is safe to use for checkers like -// // "unused-import" -// type Reference struct { -// // IsWriteRef determines if this reference is a write reference. -// // For write refs, only the expression being assigned is stored. -// // i.e: for `a = 3`, this list will store the `3` node, not the assignment node -// IsWriteRef bool -// // Variable stores the variable being referenced -// Variable *Variable -// // Node stores the node that references the variable -// Node *sitter.Node -// } - -// type VarKind int32 - -// const ( -// VarKindError VarKind = iota -// VarKindImport -// VarKindFunction -// VarKindVariable -// VarKindParameter -// ) - -// type Variable struct { -// Kind VarKind -// // Stores the name of the variable -// Name string -// // DeclNode is the AST node that declares this variable -// DeclNode *sitter.Node -// // Refs is a list of references to this variable throughout the file -// Refs []*Reference -// } - -// // ScopeBuilder is an interface that has to be implemented -// // once for every supported language. -// // Languages that don't implement a `ScopeBuilder` can still have checkers, just -// // not any that require scope resolution. -// type ScopeBuilder interface { -// GetLanguage() Language -// // NodeCreatesScope returns true if the node introduces a new scope -// // into the scope tree -// NodeCreatesScope(node *sitter.Node) bool -// // DeclaresVariable determines if we can extract new variables out of this AST node -// DeclaresVariable(node *sitter.Node) bool -// // CollectVariables extracts variables from the node and adds them to the scope -// CollectVariables(node *sitter.Node) []*Variable -// // OnNodeEnter is called when the scope builder enters a node -// // for the first time, and hasn't scanned its children decls just yet -// // can be used to handle language specific scoping rules, if any -// // If `node` is smth like a block statement, `currentScope` corresponds -// // to the scope introduced by the block statement. -// OnNodeEnter(node *sitter.Node, currentScope *Scope) -// // OnNodeExit is called when the scope builder exits a node -// // can be used to handle language specific scoping rules, if any -// // If `node` is smth like a block statement, `currentScope` corresponds -// // to the scope introduced by the block statement. -// OnNodeExit(node *sitter.Node, currentScope *Scope) -// } - -// type Scope struct { -// // AstNode is the AST node that introduces this scope into the scope tree -// AstNode *sitter.Node -// // Variables is a map of variable name to an object representing it -// Variables map[string]*Variable -// // Upper is the parent scope of this scope -// Upper *Scope -// // Children is a list of scopes that are children of this scope -// Children []*Scope -// } - -// func NewScope(upper *Scope) *Scope { -// return &Scope{ -// Variables: map[string]*Variable{}, -// Upper: upper, -// } -// } - -// // Lookup searches for a variable in the current scope and its parents -// func (s *Scope) Lookup(name string) *Variable { -// if v, exists := s.Variables[name]; exists { -// return v -// } - -// if s.Upper != nil { -// return s.Upper.Lookup(name) -// } - -// return nil -// } - -// type ScopeTree struct { -// Language Language -// // ScopeOfNode maps every scope-having node to its corresponding scope. -// // E.g: a block statement is mapped to the scope it introduces. -// ScopeOfNode map[*sitter.Node]*Scope -// // Root is the top-level scope in the program, -// // usually associated with the `program` or `module` node -// Root *Scope -// } - -// // BuildScopeTree constructs a scope tree from the AST for a program -// func BuildScopeTree(builder ScopeBuilder, ast *sitter.Node, source []byte) *ScopeTree { -// root := NewScope(nil) -// root.AstNode = ast - -// scopeOfNode := make(map[*sitter.Node]*Scope) -// buildScopeTree(builder, source, ast, root, scopeOfNode) - -// return &ScopeTree{ -// Language: builder.GetLanguage(), -// ScopeOfNode: scopeOfNode, -// Root: root, -// } -// } - -// func buildScopeTree( -// builder ScopeBuilder, -// source []byte, -// node *sitter.Node, -// scope *Scope, -// scopeOfNode map[*sitter.Node]*Scope, -// ) *Scope { -// builder.OnNodeEnter(node, scope) -// defer builder.OnNodeExit(node, scope) - -// if builder.DeclaresVariable(node) { -// decls := builder.CollectVariables(node) -// for _, decl := range decls { -// scope.Variables[decl.Name] = decl -// } -// } - -// nextScope := scope -// if builder.NodeCreatesScope(node) { -// nextScope = NewScope(scope) -// nextScope.AstNode = node -// scopeOfNode[node] = nextScope - -// if scope != nil { -// scope.Children = append(scope.Children, nextScope) -// } else { -// scope = nextScope // root -// } -// } - -// for i := 0; i < int(node.NamedChildCount()); i++ { -// child := node.NamedChild(i) -// buildScopeTree(builder, source, child, nextScope, scopeOfNode) -// } - -// return scope -// } - -// // GetScope finds the nearest surrounding scope of an AST node -// func (st *ScopeTree) GetScope(node *sitter.Node) *Scope { -// if scope, exists := st.ScopeOfNode[node]; exists { -// return scope -// } - -// if parent := node.Parent(); parent != nil { -// return st.GetScope(parent) -// } - -// return nil -// } - -// func MakeScopeTree(lang Language, ast *sitter.Node, source []byte) *ScopeTree { -// switch lang { -// case LangPy: -// return nil -// case LangTs, LangJs, LangTsx: -// builder := &TsScopeBuilder{ -// ast: ast, -// source: source, -// } -// return BuildScopeTree(builder, ast, source) -// default: -// return nil -// } -// } diff --git a/pkg/analysis/scope_ts.go b/pkg/analysis/scope_ts.go deleted file mode 100644 index 62d65661..00000000 --- a/pkg/analysis/scope_ts.go +++ /dev/null @@ -1,295 +0,0 @@ -// scope resolution implementation for JS and TS files -package analysis - -// import ( -// "slices" - -// sitter "github.com/smacker/go-tree-sitter" -// ) - -// type UnresolvedRef struct { -// id *sitter.Node -// surroundingScope *Scope -// } - -// type TsScopeBuilder struct { -// ast *sitter.Node -// source []byte -// // unresolvedRefs is the list of references that could not be resolved thus far in the traversal -// unresolvedRefs []UnresolvedRef -// } - -// func (j *TsScopeBuilder) GetLanguage() Language { -// return LangJs -// } - -// var ScopeNodes = []string{ -// "statement_block", -// "function_declaration", -// "function_expression", -// "for_statement", -// "for_in_statement", -// "for_of_statement", -// "program", -// } - -// func (ts *TsScopeBuilder) NodeCreatesScope(node *sitter.Node) bool { -// return slices.Contains(ScopeNodes, node.Type()) -// } - -// func (ts *TsScopeBuilder) DeclaresVariable(node *sitter.Node) bool { -// typ := node.Type() -// // addition of function_declaration and formal_parameters necessary for functional scope handling. -// return typ == "variable_declarator" || typ == "import_clause" || typ == "import_specifier" || typ == "formal_parameters" || typ == "function_declaration" -// } - -// func (ts *TsScopeBuilder) scanDecl(idOrPattern, declarator *sitter.Node, decls []*Variable) []*Variable { -// switch idOrPattern.Type() { -// case "identifier": -// // = ... -// nameStr := idOrPattern.Content(ts.source) -// decls = append(decls, &Variable{ -// Kind: VarKindVariable, -// Name: nameStr, -// DeclNode: declarator, -// }) - -// case "object_pattern": -// // { } = ... -// props := ChildrenOfType(idOrPattern, "shorthand_property_identifier_pattern") -// for _, prop := range props { -// decls = append(decls, &Variable{ -// Kind: VarKindVariable, -// Name: prop.Content(ts.source), -// DeclNode: declarator, -// }) -// } - -// pairs := ChildrenOfType(idOrPattern, "pair_pattern") -// for _, pair := range pairs { -// decls = ts.scanDecl(pair, declarator, decls) -// } - -// // { realName : } = ... -// // alias can be an identifier or nested object pattern. -// case "pair_pattern": -// binding := idOrPattern.ChildByFieldName("value") -// decls = ts.scanDecl(binding, declarator, decls) - -// case "array_pattern": -// // [ ] = foo -// childrenIds := ChildrenOfType(idOrPattern, "identifier") -// childrenObjPatterns := ChildrenOfType(idOrPattern, "object_pattern") -// childrenArrayPatterns := ChildrenOfType(idOrPattern, "array_pattern") -// for _, id := range childrenIds { -// decls = append(decls, &Variable{ -// Kind: VarKindVariable, -// Name: id.Content(ts.source), -// DeclNode: declarator, -// }) -// } - -// for _, objPattern := range childrenObjPatterns { -// decls = ts.scanDecl(objPattern, declarator, decls) -// } - -// for _, arrayPattern := range childrenArrayPatterns { -// decls = ts.scanDecl(arrayPattern, declarator, decls) -// } - -// for _, objectPattern := range childrenObjPatterns { -// decls = ts.scanDecl(objectPattern, declarator, decls) -// } -// } - -// return decls -// } - -// func (ts *TsScopeBuilder) variableFromImportSpecifier(specifier *sitter.Node) *Variable { -// name := specifier.ChildByFieldName("name") -// if name == nil { -// // skipcq: TCV-001 -// return nil -// } - -// var Name string -// if specifier.Child(2) != nil { -// // alias ( as ) -// local := specifier.Child(2) -// Name = local.Content(ts.source) -// } else { -// // no alias -// Name = name.Content(ts.source) -// } - -// return &Variable{ -// Kind: VarKindImport, -// Name: Name, -// DeclNode: specifier, -// } -// } - -// func (ts *TsScopeBuilder) CollectVariables(node *sitter.Node) []*Variable { -// var declaredVars []*Variable -// switch node.Type() { -// case "variable_declarator": -// lhs := node.ChildByFieldName("name") -// return ts.scanDecl(lhs, node, declaredVars) - -// case "function_declaration": -// name := node.ChildByFieldName("name") -// // skipcq: TCV-001 -// if name == nil { -// break -// } - -// declaredVars = append(declaredVars, &Variable{ -// Kind: VarKindFunction, -// Name: name.Content(ts.source), -// DeclNode: node, -// }) - -// case "formal_parameters": -// // TODO - -// for i := 0; i < int(node.NamedChildCount()); i++ { -// param := node.NamedChild(i) -// if param == nil { -// continue -// } -// // Handle different parameter types (required, optional, rest, patterns) -// // Simple identifier parameter: function foo(x) -// // Required parameter often wraps identifier: function foo(x: number) -// var identifier *sitter.Node -// if param.Type() == "identifier" { -// identifier = param -// } else if param.Type() == "required_parameter" || param.Type() == "optional_parameter" { -// // Look for pattern which might be identifier or destructuring -// pattern := param.ChildByFieldName("pattern") -// if pattern != nil && pattern.Type() == "identifier" { -// identifier = pattern -// } -// // TODO: Handle destructuring patterns within parameters if needed by calling scanDecl -// } else if param.Type() == "assignment_pattern" { -// // Parameter with default value: function foo(x = 1) -// left := param.ChildByFieldName("left") -// if left != nil && left.Type() == "identifier" { -// identifier = left -// } -// // TODO: Handle destructuring patterns within parameters if needed by calling scanDecl -// } -// // TODO: Handle rest parameter (...)+ -// if identifier != nil { -// declaredVars = append(declaredVars, &Variable{ -// Kind: VarKindParameter, -// Name: identifier.Content(ts.source), -// DeclNode: param, // Use the parameter node itself (or identifier) as DeclNode -// }) -// } -// // Add handling for destructuring patterns here if necessary using scanDecl -// } - -// case "import_specifier": -// // import { } from ... -// variable := ts.variableFromImportSpecifier(node) -// declaredVars = append(declaredVars, variable) - -// case "import_clause": -// // import , { } from ... -// defaultImport := FirstChildOfType(node, "identifier") -// if defaultImport != nil { -// declaredVars = append(declaredVars, &Variable{ -// Kind: VarKindImport, -// Name: defaultImport.Content(ts.source), -// DeclNode: defaultImport, -// }) -// } -// } - -// return declaredVars -// } - -// func (ts *TsScopeBuilder) OnNodeEnter(node *sitter.Node, scope *Scope) { -// // collect identifier references if one is found -// if node.Type() == "identifier" { -// parent := node.Parent() -// if parent == nil { -// return -// } - -// parentType := parent.Type() - -// if parentType == "variable_declarator" && parent.ChildByFieldName("name") == node { -// return -// } - -// if parentType == "formal_parameters" { -// return -// } - -// // binding identifiers in array patterns are not references. -// // e.g. in `const [a, b] = foo;`, `a` and `b` are not references. -// if parentType == "array_pattern" { -// return -// } - -// if parentType == "assignment_pattern" && parent.ChildByFieldName("left") == node { -// return -// } - -// if parentType == "required_parameter" && parent.ChildByFieldName("pattern") == node { -// return -// } - -// // destructured property binding names are *not* references. -// // e.g. in `const { a: b } = foo;`, `a` is not a reference. -// if parentType == "pair_pattern" && parent.ChildByFieldName("key") == node { -// return -// } - -// if parentType == "import_clause" || parentType == "import_specifier" { -// return -// } - -// // try to resolve this reference to a target variable -// variable := scope.Lookup(node.Content(ts.source)) -// if variable == nil { -// unresolved := UnresolvedRef{ -// id: node, -// surroundingScope: scope, -// } - -// ts.unresolvedRefs = append(ts.unresolvedRefs, unresolved) -// return -// } - -// // If a variable is found, add a reference to it -// ref := &Reference{ -// Variable: variable, -// Node: node, -// } -// variable.Refs = append(variable.Refs, ref) -// } -// } - -// func (ts *TsScopeBuilder) OnNodeExit(node *sitter.Node, scope *Scope) { -// if node.Type() == "program" { -// // At the end, try to resolve all unresolved references -// for _, unresolved := range ts.unresolvedRefs { -// variable := unresolved.surroundingScope.Lookup( -// unresolved.id.Content(ts.source), -// ) - -// if variable == nil { -// continue -// } - -// ref := &Reference{ -// Variable: variable, -// Node: unresolved.id, -// } - -// variable.Refs = append(variable.Refs, ref) -// } -// } -// } diff --git a/pkg/analysis/scope_ts_test.go b/pkg/analysis/scope_ts_test.go deleted file mode 100644 index ca5d3da3..00000000 --- a/pkg/analysis/scope_ts_test.go +++ /dev/null @@ -1,133 +0,0 @@ -package analysis - -// import ( -// "testing" - -// "github.com/stretchr/testify/assert" -// "github.com/stretchr/testify/require" -// ) - -// func parseFile(t *testing.T, source string) *ParseResult { -// parsed, err := Parse("file.ts", []byte(source), LangJs, LangJs.Grammar()) -// require.NoError(t, err) -// require.NotNil(t, parsed) -// return parsed -// } - -// func Test_BuildScopeTree(t *testing.T) { -// t.Run("is able to resolve references", func(t *testing.T) { -// source := ` -// let x = 1 -// { -// let y = x -// }` -// parsed := parseFile(t, source) - -// scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) -// require.NotNil(t, scopeTree) -// globalScope := scopeTree.Root.Children[0] -// varX, exists := globalScope.Variables["x"] -// require.True(t, exists) -// require.NotNil(t, varX) - -// varY, exists := globalScope.Children[0].Variables["y"] -// require.True(t, exists) -// require.NotNil(t, varY) -// require.Equal(t, VarKindVariable, varY.Kind) - -// assert.Equal(t, 1, len(varX.Refs)) -// xRef := varX.Refs[0] -// assert.Equal(t, "x", xRef.Variable.Name) -// require.Equal(t, VarKindVariable, varY.Kind) -// }) - -// t.Run("supports import statements", func(t *testing.T) { -// source := ` -// import { extname } from 'path' -// { -// let { extname = 1 } = null // does NOT count as a reference -// } - -// let { x = extname } = null // counts as a reference - -// { -// extname('file.txt') // counts as a reference -// let { extname } = null // does NOT count as a reference -// } - -// import { readFile as r } from 'file' -// r('file.txt') -// function f(r = x) {} // NOT a reference -// ` -// parsed := parseFile(t, source) - -// scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) -// require.NotNil(t, scopeTree) -// globalScope := scopeTree.Root.Children[0] -// { -// varR, exists := globalScope.Variables["r"] -// require.True(t, exists) -// require.NotNil(t, varR) - -// assert.Equal(t, VarKindImport, varR.Kind) - -// rRefs := varR.Refs -// require.Equal(t, 1, len(rRefs)) -// assert.Equal(t, "call_expression", rRefs[0].Node.Parent().Type()) -// } - -// { -// varExtname, exists := globalScope.Variables["extname"] -// require.True(t, exists) -// require.NotNil(t, varExtname) - -// assert.Equal(t, VarKindImport, varExtname.Kind) - -// extnameRefs := varExtname.Refs -// require.Equal(t, 2, len(extnameRefs)) -// assert.Equal(t, "object_assignment_pattern", extnameRefs[0].Node.Parent().Type()) -// assert.Equal(t, "call_expression", extnameRefs[1].Node.Parent().Type()) -// } -// }) - -// t.Run("handles function declaration with parameters", func(t *testing.T) { -// source := ` -// function greet(name, age = 18) { -// let greeting = "Hello"; -// return greeting + " " + name; -// } -// greet("Alice") -// ` - -// parsed := parseFile(t, source) -// require.NotNil(t, parsed) -// scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) -// globalScope := scopeTree.Root.Children[0] -// // Checking function declaration -// funcVar := globalScope.Lookup("greet") -// require.NotNil(t, funcVar) -// funcVariable, exists := globalScope.Variables["greet"] // tagged as an Identifier -// require.True(t, exists) -// require.NotNil(t, funcVariable) - -// funcScope := scopeTree.GetScope(funcVar.DeclNode) -// require.NotNil(t, funcScope) - -// nameVar, exists := funcScope.Variables["name"] -// require.True(t, exists) -// require.Equal(t, VarKindParameter, nameVar.Kind) - -// ageVar, exists := funcScope.Variables["age"] -// require.True(t, exists) -// require.Equal(t, VarKindParameter, ageVar.Kind) - -// // existence of function body - -// bodyScope := funcScope.Children[0] -// require.NotNil(t, bodyScope) - -// greetingVar, exists := bodyScope.Variables["greeting"] -// require.True(t, exists) -// require.Equal(t, VarKindVariable, greetingVar.Kind) -// }) -// } diff --git a/pkg/analysis/walk.go b/pkg/analysis/walk.go deleted file mode 100644 index d04af646..00000000 --- a/pkg/analysis/walk.go +++ /dev/null @@ -1,96 +0,0 @@ -package analysis - -// import ( -// sitter "github.com/smacker/go-tree-sitter" -// ) - -// // Walker is an interface that dictates what to do when -// // entering and leaving each node during the pre-order traversal -// // of a tree. -// // To traverse post-order, use the `OnLeaveNode` callback. -// type Walker interface { -// // OnEnterNode is called when the walker enters a node. -// // The boolean return value indicates whether the walker should -// // continue walking the sub-tree of this node. -// OnEnterNode(node *sitter.Node) bool -// // OnLeaveNode is called when the walker leaves a node. -// // This is called after all the children of the node have been visited and explored. -// OnLeaveNode(node *sitter.Node) -// } - -// func WalkTree(node *sitter.Node, walker Walker) { -// goInside := walker.OnEnterNode(node) -// if goInside { -// for i := 0; i < int(node.NamedChildCount()); i++ { -// child := node.NamedChild(i) -// WalkTree(child, walker) -// } -// } - -// walker.OnLeaveNode(node) -// } - -// // ChildrenWithFieldName returns all the children of a node -// // with a specific field name. -// // Tree-sitter can have multiple children with the same field name. -// func ChildrenWithFieldName(node *sitter.Node, fieldName string) []*sitter.Node { -// var children []*sitter.Node -// for i := 0; i < int(node.ChildCount()); i++ { -// if node.FieldNameForChild(i) == fieldName { -// child := node.Child(i) -// children = append(children, child) -// } -// } - -// return children -// } - -// // FindMatchingChild iterates over all children of a node—both named and unnamed—and returns the -// // first child that matches the predicate function. -// func FindMatchingChild(node *sitter.Node, predicate func(*sitter.Node) bool) *sitter.Node { -// nChildren := int(node.ChildCount()) - -// for i := 0; i < nChildren; i++ { -// child := node.Child(i) -// if predicate(child) { -// return child -// } -// } - -// return nil -// } - -// func ChildrenOfType(node *sitter.Node, nodeType string) []*sitter.Node { -// nChildren := int(node.ChildCount()) -// var results []*sitter.Node -// for i := 0; i < nChildren; i++ { -// child := node.Child(i) -// if child.Type() == nodeType { -// results = append(results, child) -// } -// } -// return results -// } - -// func ChildWithFieldName(node *sitter.Node, fieldName string) *sitter.Node { -// nChildren := int(node.NamedChildCount()) -// for i := 0; i < nChildren; i++ { -// if node.FieldNameForChild(i) == fieldName { -// return node.Child(i) -// } -// } - -// return nil -// } - -// func FirstChildOfType(node *sitter.Node, nodeType string) *sitter.Node { -// nChildren := int(node.ChildCount()) -// for i := 0; i < nChildren; i++ { -// child := node.Child(i) -// if child.Type() == nodeType { -// return child -// } -// } - -// return nil -// } From 4bede167cc6b558ff50ade39b91be103be71a4e3 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Wed, 4 Jun 2025 23:16:05 +0530 Subject: [PATCH 06/12] chore: fix potential bugs --- analysis/testrunner.go | 43 ------------------------------------------ analysis/yaml.go | 4 ++-- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/analysis/testrunner.go b/analysis/testrunner.go index e9be4a20..7842b07a 100644 --- a/analysis/testrunner.go +++ b/analysis/testrunner.go @@ -3,7 +3,6 @@ package analysis import ( "fmt" "io/fs" - "os" "path/filepath" "regexp" "sort" @@ -145,48 +144,6 @@ func getExpectedIssuesInDir(testDir string, fileFilter func(string) bool) (map[s return expectedIssues, nil } -func discoverYamlAnalyzers(testDir string) ([]*Analyzer, error) { - var yamlAnalyzers []*Analyzer - - err := filepath.Walk(testDir, func(path string, info fs.FileInfo, err error) error { - if err != nil { - return nil - } - - if info.IsDir() { - return nil - } - - fileExt := filepath.Ext(path) - isYamlFile := fileExt == ".yaml" || fileExt == ".yml" - if !isYamlFile { - return nil - } - - // Check if there's a corresponding test file - baseName := strings.TrimSuffix(path, fileExt) - - // Try to read the YAML checker - analyzer, _, err := ReadFromFile(path) - if err != nil { - // Skip files that aren't valid checkers - return nil - } - - // Check if corresponding test file exists - testFile := baseName + ".test" + GetExtFromLanguage(analyzer.Language) - if _, err := os.Stat(testFile); os.IsNotExist(err) { - // Skip if no test file exists - return nil - } - - yamlAnalyzers = append(yamlAnalyzers, &analyzer) - return nil - }) - - return yamlAnalyzers, err -} - func getExpectedIssuesInFile(file *ParseResult, query *sitter.Query) map[int][]string { commentIdentifier := GetEscapedCommentIdentifierFromPath(file.FilePath) diff --git a/analysis/yaml.go b/analysis/yaml.go index 2548b2e9..205655bf 100644 --- a/analysis/yaml.go +++ b/analysis/yaml.go @@ -250,7 +250,7 @@ func (ana *YamlAnalyzer) runParentFilters(source []byte, capture *sitter.Node) b nodeMatched := false for parent := capture.Parent(); parent != nil; parent = parent.Parent() { - if ana.filterMatchesParent(&filter, parent, source) { + if filterMatchesParent(&filter, parent, source) { nodeMatched = true if !shouldMatch { return false @@ -268,7 +268,7 @@ func (ana *YamlAnalyzer) runParentFilters(source []byte, capture *sitter.Node) b return true } -func (ana *YamlAnalyzer) filterMatchesParent(filter *NodeFilter, parent *sitter.Node, source []byte) bool { +func filterMatchesParent(filter *NodeFilter, parent *sitter.Node, source []byte) bool { qc := sitter.NewQueryCursor() defer qc.Close() From 36b7ef4d5fb698f13e7ef05541b4b03b76c24694 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Mon, 9 Jun 2025 21:02:39 +0530 Subject: [PATCH 07/12] feat: add mechanism to parse and configure custom functions in YAML analyzer --- analysis/directory.go | 27 ++++++++ analysis/directory_test.go | 37 +++++++++++ analysis/feature | 62 +++++++++++++++++ .../testdata/mock-analysis-function.test.js | 10 +++ analysis/testdata/mock-analysis-function.yml | 23 +++++++ analysis/yaml.go | 66 +++++++++++++------ analysis/yaml_test.go | 11 ++++ 7 files changed, 215 insertions(+), 21 deletions(-) create mode 100644 analysis/directory.go create mode 100644 analysis/directory_test.go create mode 100644 analysis/feature create mode 100644 analysis/testdata/mock-analysis-function.test.js create mode 100644 analysis/testdata/mock-analysis-function.yml diff --git a/analysis/directory.go b/analysis/directory.go new file mode 100644 index 00000000..ce40e57f --- /dev/null +++ b/analysis/directory.go @@ -0,0 +1,27 @@ +package analysis + +var RegisteredAnalysisFunctions = []AnalysisFunction{} + +func InitializeAnalysisFunction(fn AnalysisFunction) AnalysisFunction { + switch fn.Name { + case "taint": + fn.Run = TaintRun + } + + RegisteredAnalysisFunctions = append(RegisteredAnalysisFunctions, fn) + return fn +} + +func TaintRun(args ...interface{}) (Analyzer, error) { + sources := args[0].([]string) + sinks := args[1].([]string) + + analyzer := NewTaintAnalyzer(sources, sinks) + return analyzer, nil +} + +func NewTaintAnalyzer(sources, sinks []string) Analyzer { + return Analyzer{ + Name: "taint_analyzer", + } +} diff --git a/analysis/directory_test.go b/analysis/directory_test.go new file mode 100644 index 00000000..348de9f4 --- /dev/null +++ b/analysis/directory_test.go @@ -0,0 +1,37 @@ +package analysis + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestInitializeAnalysisFunction(t *testing.T) { + + fn := InitializeAnalysisFunction(AnalysisFunction{ + Name: "taint", + Parameters: map[string][]string{ + "sources": {"string"}, + "sinks": {"string"}, + }, + }) + + inbuiltTaintAnalyzer, err := fn.Run([]string{"source1", "source2"}, []string{"sink1", "sink2"}) + assert.NoError(t, err) + + assert.Equal(t, inbuiltTaintAnalyzer.Name, "taint_analyzer") + +} + +func TestPopulationOfAnalysisFuncitonRegistry(t *testing.T) { + _ = InitializeAnalysisFunction(AnalysisFunction{ + Name: "taint", + Parameters: map[string][]string{ + "sources": {"string"}, + "sinks": {"string"}, + }, + }) + + assert.Equal(t, len(RegisteredAnalysisFunctions), 1) + +} diff --git a/analysis/feature b/analysis/feature new file mode 100644 index 00000000..7812136f --- /dev/null +++ b/analysis/feature @@ -0,0 +1,62 @@ +type AnalysisFunction struct { + Name string + Parameters []reflect.Type + Run func(args ...interface{}) (Analyzer, error) +} + +--- +functions/run_taint_analysis.go +--- +TaintAnalysisFunction := AnalysisFunction{ + Name: "taint", + Parameters: []reflect.Type{ + reflect.TypeOf([]string{}), // sources + reflect.TypeOf([]string{}), // sinks + }, + Description: "Runs a taint analysis on the provided function and its parameters.", + Run: func(args ...interface{}) (Analyzer, error) { + sources := args[0].([]string) + sinks := args[1].([]string) + + analyzer := NewTaintAnalyzer(sources, sinks) + return analyzer, nil + } +} + +func NewTaintAnalyzer(sources, sinks []string) Analyzer { + return &TaintAnalyzer{ + Sources: sources, + Sinks: sinks, + } +} + +--- +directory.go +--- +functions := []AnalysisFunction{ + TaintAnalysisFunction, +} + +for _, function := range functions { + analyzer, err := function.Run(function.Parameters...) + analyzers = append(analyzers, analyzer) +} + + + +--- +name: "run_taint_analysis" +language: go +description: "Runs a taint analysis on the provided function and its parameters." +analysisFunction: + name: taint + parameters: + sources: + - (query) + sinks: + - ( + (callexpression method @methodname (parameterList)) + #match @methodname "get_user_input" + ) + - (function (parameterList)) + - (function (parameterList)) \ No newline at end of file diff --git a/analysis/testdata/mock-analysis-function.test.js b/analysis/testdata/mock-analysis-function.test.js new file mode 100644 index 00000000..2e9077c1 --- /dev/null +++ b/analysis/testdata/mock-analysis-function.test.js @@ -0,0 +1,10 @@ +function getUserInput(key) { + + return document.getElementById(key).value; + +} + +userInput = getUserInput('username') + +// A sink method, which performs some raw databse operation on the userInput +perform_db_operation(userInput) \ No newline at end of file diff --git a/analysis/testdata/mock-analysis-function.yml b/analysis/testdata/mock-analysis-function.yml new file mode 100644 index 00000000..400023c4 --- /dev/null +++ b/analysis/testdata/mock-analysis-function.yml @@ -0,0 +1,23 @@ +name: "run_taint_analysis" +language: javascript +category: security +severity: high +message: "This is just a mock checker" +analysisFunction: + name: taint + parameters: + sources: + - | + (call_expression + function: (identifier) @sourceName + (#eq? @sourceName "getUserInput")) + sinks: + - | + (call_expression + function: (identifier) @sinkName + (#eq? @sinkName "perform_db_operation")) + +pattern: | + (call_expression) + +description: "Runs a taint analysis on the provided function and its parameters." \ No newline at end of file diff --git a/analysis/yaml.go b/analysis/yaml.go index 205655bf..53be8e9c 100644 --- a/analysis/yaml.go +++ b/analysis/yaml.go @@ -47,27 +47,37 @@ type PathFilter struct { IncludeGlobs []glob.Glob } +type AnalysisFunction struct { + Name string `yaml:"name"` + Parameters map[string][]string `yaml:"parameters"` + Run func(args ...interface{}) (Analyzer, error) +} + type Yaml struct { - Language string `yaml:"language"` - Code string `yaml:"name"` - Message string `yaml:"message"` - Category Category `yaml:"category"` - Severity Severity `yaml:"severity"` - Pattern string `yaml:"pattern"` - Patterns []string `yaml:"patterns"` - Description string `yaml:"description"` - Exclude []string `yaml:"exclude,omitempty"` - Include []string `yaml:"include,omitempty"` - Filters []filterYaml `yaml:"filters,omitempty"` - PathFilter *pathFilterYaml `yaml:"path_filter,omitempty"` + Language string `yaml:"language"` + Code string `yaml:"name"` + Message string `yaml:"message"` + Category Category `yaml:"category"` + Severity Severity `yaml:"severity"` + Pattern string `yaml:"pattern"` + Patterns []string `yaml:"patterns"` + Description string `yaml:"description"` + Exclude []string `yaml:"exclude,omitempty"` + Include []string `yaml:"include,omitempty"` + Filters []filterYaml `yaml:"filters,omitempty"` + PathFilter *pathFilterYaml `yaml:"path_filter,omitempty"` + Sink []string `yaml:"sink,omitempty"` + Source []string `yaml:"source,omitempty"` + AnalysisFunction AnalysisFunction `yaml:"analysisFunction,omitempty"` } type YamlAnalyzer struct { - Analyzer Analyzer - Patterns []*sitter.Query - NodeFilter []NodeFilter - PathFilter *PathFilter - Message string + Analyzer Analyzer + Patterns []*sitter.Query + NodeFilter []NodeFilter + PathFilter *PathFilter + Message string + AnalysisFunction AnalysisFunction } // ReadFromFile reads a pattern checker definition from a YAML config file. @@ -87,6 +97,16 @@ func ReadFromBytes(fileContent []byte) (Analyzer, YamlAnalyzer, error) { return Analyzer{}, YamlAnalyzer{}, err } + analysisFunction := checker.AnalysisFunction + + if analysisFunction.Name != "" { + analysisFunction.Parameters = map[string][]string{ + "sources": checker.Source, + "sinks": checker.Sink, + } + InitializeAnalysisFunction(analysisFunction) + } + lang := DecodeLanguage(checker.Language) if lang == LangUnknown { return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("unknown language code: '%s'", checker.Language) @@ -196,10 +216,11 @@ func ReadFromBytes(fileContent []byte) (Analyzer, YamlAnalyzer, error) { Category: checker.Category, Severity: checker.Severity, }, - Patterns: patterns, - NodeFilter: filters, - PathFilter: pathFilter, - Message: checker.Message, + Patterns: patterns, + NodeFilter: filters, + PathFilter: pathFilter, + Message: checker.Message, + AnalysisFunction: checker.AnalysisFunction, } patternChecker.Run = RunYamlAnalyzer(yamlAnalyzer) @@ -291,3 +312,6 @@ func filterMatchesParent(filter *NodeFilter, parent *sitter.Node, source []byte) return false } + +// TODO: Add a new field inside the Yaml Strcut, that takes in Function field +// Introduction of a new Struct for storing analysisFunction diff --git a/analysis/yaml_test.go b/analysis/yaml_test.go index 1cb08677..c1898696 100644 --- a/analysis/yaml_test.go +++ b/analysis/yaml_test.go @@ -74,3 +74,14 @@ func TestNodeFilterWithTests(t *testing.T) { t.Logf("Log: %s", log) assert.True(t, passed) } + +func TestAnalysisFunction(t *testing.T) { + path := "./testdata/mock-analysis-function.yml" + ana, yamlAna, err := ReadFromFile(path) + require.NoError(t, err, "Failed to read YAML data") + assert.Equal(t, ana.Name, "run_taint_analysis") + assert.Equal(t, yamlAna.AnalysisFunction.Name, "taint") + assert.Equal(t, len(yamlAna.AnalysisFunction.Parameters["sources"]), 1) + assert.Equal(t, len(yamlAna.AnalysisFunction.Parameters["sinks"]), 1) + assert.Equal(t, yamlAna.AnalysisFunction.Name, "taint") +} From 869130a8357acb7780b78c5052f76aab9c62bdff Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Wed, 11 Jun 2025 01:04:39 +0530 Subject: [PATCH 08/12] checkers(javascript): initialize go checker to detect source/sink nodes based on tree-sitter queries --- analysis/analysis_functions.go | 17 ++++ analysis/directory.go | 15 ---- analysis/directory_test.go | 4 +- analysis/yaml.go | 2 +- analysis/yaml_test.go | 1 + checkers/javascript/taint_detector.go | 93 ++++++++++++++++++++++ checkers/javascript/taint_detector_test.go | 58 ++++++++++++++ 7 files changed, 172 insertions(+), 18 deletions(-) create mode 100644 analysis/analysis_functions.go create mode 100644 checkers/javascript/taint_detector.go create mode 100644 checkers/javascript/taint_detector_test.go diff --git a/analysis/analysis_functions.go b/analysis/analysis_functions.go new file mode 100644 index 00000000..5d076bd2 --- /dev/null +++ b/analysis/analysis_functions.go @@ -0,0 +1,17 @@ +package analysis + +func TaintRun(args ...interface{}) Analyzer { + sources := args[0].([]string) + sinks := args[1].([]string) + + analyzer := NewTaintAnalyzer(sources, sinks) + return analyzer +} + +func NewTaintAnalyzer(sources, sinks []string) Analyzer { + return Analyzer{ + Name: "taint_analyzer", + } +} + + diff --git a/analysis/directory.go b/analysis/directory.go index ce40e57f..44eb50cb 100644 --- a/analysis/directory.go +++ b/analysis/directory.go @@ -7,21 +7,6 @@ func InitializeAnalysisFunction(fn AnalysisFunction) AnalysisFunction { case "taint": fn.Run = TaintRun } - RegisteredAnalysisFunctions = append(RegisteredAnalysisFunctions, fn) return fn } - -func TaintRun(args ...interface{}) (Analyzer, error) { - sources := args[0].([]string) - sinks := args[1].([]string) - - analyzer := NewTaintAnalyzer(sources, sinks) - return analyzer, nil -} - -func NewTaintAnalyzer(sources, sinks []string) Analyzer { - return Analyzer{ - Name: "taint_analyzer", - } -} diff --git a/analysis/directory_test.go b/analysis/directory_test.go index 348de9f4..992a81aa 100644 --- a/analysis/directory_test.go +++ b/analysis/directory_test.go @@ -16,8 +16,8 @@ func TestInitializeAnalysisFunction(t *testing.T) { }, }) - inbuiltTaintAnalyzer, err := fn.Run([]string{"source1", "source2"}, []string{"sink1", "sink2"}) - assert.NoError(t, err) + inbuiltTaintAnalyzer := fn.Run([]string{"source1", "source2"}, []string{"sink1", "sink2"}) + // assert.NoError(t, err) assert.Equal(t, inbuiltTaintAnalyzer.Name, "taint_analyzer") diff --git a/analysis/yaml.go b/analysis/yaml.go index 53be8e9c..65aa4a99 100644 --- a/analysis/yaml.go +++ b/analysis/yaml.go @@ -50,7 +50,7 @@ type PathFilter struct { type AnalysisFunction struct { Name string `yaml:"name"` Parameters map[string][]string `yaml:"parameters"` - Run func(args ...interface{}) (Analyzer, error) + Run func(args ...interface{}) Analyzer } type Yaml struct { diff --git a/analysis/yaml_test.go b/analysis/yaml_test.go index c1898696..be77b873 100644 --- a/analysis/yaml_test.go +++ b/analysis/yaml_test.go @@ -82,6 +82,7 @@ func TestAnalysisFunction(t *testing.T) { assert.Equal(t, ana.Name, "run_taint_analysis") assert.Equal(t, yamlAna.AnalysisFunction.Name, "taint") assert.Equal(t, len(yamlAna.AnalysisFunction.Parameters["sources"]), 1) + t.Logf("%v", yamlAna.AnalysisFunction.Parameters["sources"]) assert.Equal(t, len(yamlAna.AnalysisFunction.Parameters["sinks"]), 1) assert.Equal(t, yamlAna.AnalysisFunction.Name, "taint") } diff --git a/checkers/javascript/taint_detector.go b/checkers/javascript/taint_detector.go new file mode 100644 index 00000000..cf297e5b --- /dev/null +++ b/checkers/javascript/taint_detector.go @@ -0,0 +1,93 @@ +package javascript + +import ( + "fmt" + + sitter "github.com/smacker/go-tree-sitter" + "globstar.dev/analysis" +) + +// var TaintDetector = &analysis.Analyzer{ +// Name: "taint_detector", +// Language: analysis.LangJs, +// Description: "Taint detector", +// Category: analysis.CategorySecurity, +// Severity: analysis.SeverityCritical, +// Requires: []*analysis.Analyzer{DataFlowAnalyzer}, +// Run: detectTaint([]string{"sink"}, []string{"source"}), +// } + +func detectTaint(sink []string, source []string) func(pass *analysis.Pass) (any, error) { + + return func(pass *analysis.Pass) (interface{}, error) { + var sinkPatterns []*sitter.Query + for _, sink := range sink { + sinkPattern, err := sitter.NewQuery([]byte(sink), analysis.LangJs.Grammar()) + if err != nil { + return nil, fmt.Errorf("failed to create sink pattern: %w", err) + } + sinkPatterns = append(sinkPatterns, sinkPattern) + } + + var sourcePatterns []*sitter.Query + for _, source := range source { + sourcePattern, err := sitter.NewQuery([]byte(source), analysis.LangJs.Grammar()) + if err != nil { + return nil, fmt.Errorf("failed to create source pattern: %w", err) + } + sourcePatterns = append(sourcePatterns, sourcePattern) + } + + if len(sinkPatterns) == 0 || len(sourcePatterns) == 0 { + return nil, fmt.Errorf("no patterns found") + } + + var sourceNodes []*sitter.Node + var sinkNodes []*sitter.Node + for _, query := range sourcePatterns { + qc := sitter.NewQueryCursor() + defer qc.Close() + qc.Exec(query, pass.FileContext.Ast) + for { + m, ok := qc.NextMatch() + if !ok { + break + } + m = qc.FilterPredicates(m, pass.FileContext.Source) + for _, capture := range m.Captures { + captureNode := capture.Node + sourceNodes = append(sourceNodes, captureNode) + } + + } + } + + for _, query := range sinkPatterns { + qc := sitter.NewQueryCursor() + defer qc.Close() + qc.Exec(query, pass.FileContext.Ast) + for { + m, ok := qc.NextMatch() + if !ok { + break + } + m = qc.FilterPredicates(m, pass.FileContext.Source) + for _, capture := range m.Captures { + captureNode := capture.Node + sinkNodes = append(sinkNodes, captureNode) + } + } + } + + if len(sinkNodes) == 0 || len(sourceNodes) == 0 { + return nil, fmt.Errorf("no sink or source pattern matched") + } + + return map[string]interface{}{ + "sinkNodes": sinkNodes, + "sourceNodes": sourceNodes, + "sinkPatterns": sinkPatterns, + "sourcePatterns": sourcePatterns, + }, nil + } +} diff --git a/checkers/javascript/taint_detector_test.go b/checkers/javascript/taint_detector_test.go new file mode 100644 index 00000000..da5198f5 --- /dev/null +++ b/checkers/javascript/taint_detector_test.go @@ -0,0 +1,58 @@ +package javascript + +import ( + "testing" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/stretchr/testify/assert" + ana "globstar.dev/analysis" +) + +func TestPatternDetection(t *testing.T) { + source := ` +function getUserInput(key) { + + return document.getElementById(key).value; + +} + +userInput = getUserInput('username') + +// A sink method, which performs some raw databse operation on the userInput +perform_db_operation(userInput) + +} +` + + parseRes := parseJsCode(t, []byte(source)) + var TaintDetectorMock = &ana.Analyzer{ + Name: "taint_detector", + Language: ana.LangJs, + Description: "Taint detector", + Category: ana.CategorySecurity, + Severity: ana.SeverityCritical, + Requires: []*ana.Analyzer{DataFlowAnalyzer}, + Run: detectTaint([]string{` + (call_expression + function: (identifier) @sourceName + (#eq? @sourceName "getUserInput"))`}, []string{` + (call_expression + function: (identifier) @sinkName + (#eq? @sinkName "perform_db_operation"))`}), + } + pass := &ana.Pass{ + FileContext: parseRes, + Analyzer: TaintDetectorMock, + } + patterns, err := TaintDetectorMock.Run(pass) + assert.NoError(t, err) + + assert.Len(t, patterns.(map[string]interface{})["sinkNodes"], 1) + assert.Len(t, patterns.(map[string]interface{})["sourceNodes"], 1) + assert.Len(t, patterns.(map[string]interface{})["sinkPatterns"], 1) + assert.Len(t, patterns.(map[string]interface{})["sourcePatterns"], 1) + + for _, node := range patterns.(map[string]interface{})["sinkNodes"].([]*sitter.Node) { + t.Log(node.Content(parseRes.Source)) + } +} From aae306a42e7f016d765b0e78cfad2ab8ac11135a Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Wed, 11 Jun 2025 23:20:08 +0530 Subject: [PATCH 09/12] checkers(javascript): add logic to detect taint on given patterns --- analysis/walk.go | 15 +++ checkers/javascript/taint_detector.go | 102 +++++++++++++++++- checkers/javascript/taint_detector_test.go | 55 +++++++--- .../testdata/taint_detector.test.js | 10 ++ 4 files changed, 169 insertions(+), 13 deletions(-) create mode 100644 checkers/javascript/testdata/taint_detector.test.js diff --git a/analysis/walk.go b/analysis/walk.go index 06273995..91a018e7 100644 --- a/analysis/walk.go +++ b/analysis/walk.go @@ -1,6 +1,8 @@ package analysis import ( + "fmt" + sitter "github.com/smacker/go-tree-sitter" ) @@ -94,3 +96,16 @@ func FirstChildOfType(node *sitter.Node, nodeType string) *sitter.Node { return nil } + +func GetRootNode(node *sitter.Node) (*sitter.Node, error) { + current := node + + if current.Parent() == nil { + return current, fmt.Errorf("at the top-most level for the node") + } + for current.Parent() != nil { + current = current.Parent() + } + + return current, nil +} diff --git a/checkers/javascript/taint_detector.go b/checkers/javascript/taint_detector.go index cf297e5b..3c9f73c8 100644 --- a/checkers/javascript/taint_detector.go +++ b/checkers/javascript/taint_detector.go @@ -17,9 +17,37 @@ import ( // Run: detectTaint([]string{"sink"}, []string{"source"}), // } -func detectTaint(sink []string, source []string) func(pass *analysis.Pass) (any, error) { +// var TaintDetector = &analysis.Analyzer{ +// Name: "taint_detector", +// Language: analysis.LangJs, +// Description: "Taint detector", +// Category: analysis.CategorySecurity, +// Severity: analysis.SeverityCritical, +// Requires: []*analysis.Analyzer{DataFlowAnalyzer}, +// Run: detectTaint([]string{` +// (expression_statement +// (assignment_expression +// right: (call_expression +// function: (identifier) @sourceName +// ))(#eq? @sourceName "getUserInput"))`}, []string{` +// (call_expression +// function: (identifier) @sinkName +// (#eq? @sinkName "perform_db_operation"))`}), +// } + +func detectTaint(source []string, sink []string) func(pass *analysis.Pass) (any, error) { return func(pass *analysis.Pass) (interface{}, error) { + dfg := pass.ResultOf[DataFlowAnalyzer].(*DataFlowGraph) + if dfg == nil { + return nil, fmt.Errorf("no data flow graph found") + } + scopeTree := dfg.ScopeTree + if scopeTree == nil { + fmt.Println("no scope tree found") + return nil, fmt.Errorf("no scope tree found") + } + var sinkPatterns []*sitter.Query for _, sink := range sink { sinkPattern, err := sitter.NewQuery([]byte(sink), analysis.LangJs.Grammar()) @@ -83,6 +111,78 @@ func detectTaint(sink []string, source []string) func(pass *analysis.Pass) (any, return nil, fmt.Errorf("no sink or source pattern matched") } + // Get the data flow graph to track variable relationships + + // Track source variables that flow into sinks + // var taintedFlows []struct { + // source *sitter.Node + // sink *sitter.Node + // } + + // For each source node, get its variable + // for _, sourceNode := range sourceNodes { + // // Get the assignment node (parent.parent.parent of source capture) + // assignNode := sourceNode.Parent().Parent().Parent() + // if assignNode == nil { + // continue + // } + + // // Get the identifier node and its scope + // idNode := assignNode.ChildByFieldName("left") + // if idNode == nil { + // continue + // } + + // idScope := scopeTree.GetScope(idNode) + // if idScope == nil { + // continue + // } + + // // Look up the variable for the identifier + // sourceVar := idScope.Lookup(idNode.Content(pass.FileContext.Source)) + // if sourceVar == nil { + // continue + // } + + // // For each sink, check if it uses the source variable + // for _, sinkNode := range sinkNodes { + // // Get the call expression node + // callNode, err := analysis.GetRootNode(sinkNode) + // if err != nil { + // continue + // } + + // // Get the argument node and its variable + // argsNode := callNode.ChildByFieldName("arguments") + // if argsNode == nil || argsNode.NamedChildCount() == 0 { + // continue + // } + // argNode := argsNode.NamedChild(0) + + // argScope := scopeTree.GetScope(callNode) + // if argScope == nil { + // continue + // } + + // argVar := argScope.Lookup(argNode.Content(pass.FileContext.Source)) + // if argVar == nil { + // continue + // } + + // // If the argument variable matches the source variable, we found a tainted flow + // if argVar == sourceVar { + // taintedFlows = append(taintedFlows, struct { + // source *sitter.Node + // sink *sitter.Node + // }{sourceNode, sinkNode}) + // } + // } + // } + + // for _, tainted := range taintedFlows { + // pass.Report(pass, tainted.sink, "") + // } + return map[string]interface{}{ "sinkNodes": sinkNodes, "sourceNodes": sourceNodes, diff --git a/checkers/javascript/taint_detector_test.go b/checkers/javascript/taint_detector_test.go index da5198f5..3a9b5580 100644 --- a/checkers/javascript/taint_detector_test.go +++ b/checkers/javascript/taint_detector_test.go @@ -5,6 +5,7 @@ import ( sitter "github.com/smacker/go-tree-sitter" "github.com/stretchr/testify/assert" + "globstar.dev/analysis" ana "globstar.dev/analysis" ) @@ -24,35 +25,65 @@ perform_db_operation(userInput) } ` - parseRes := parseJsCode(t, []byte(source)) - var TaintDetectorMock = &ana.Analyzer{ + var TaintDetector = &analysis.Analyzer{ Name: "taint_detector", - Language: ana.LangJs, + Language: analysis.LangJs, Description: "Taint detector", - Category: ana.CategorySecurity, - Severity: ana.SeverityCritical, - Requires: []*ana.Analyzer{DataFlowAnalyzer}, + Category: analysis.CategorySecurity, + Severity: analysis.SeverityCritical, + Requires: []*analysis.Analyzer{DataFlowAnalyzer}, Run: detectTaint([]string{` - (call_expression - function: (identifier) @sourceName - (#eq? @sourceName "getUserInput"))`}, []string{` + (expression_statement + (assignment_expression + right: (call_expression + function: (identifier) @sourceName + ))(#eq? @sourceName "getUserInput"))`}, []string{` (call_expression function: (identifier) @sinkName (#eq? @sinkName "perform_db_operation"))`}), } + parseRes := parseJsCode(t, []byte(source)) pass := &ana.Pass{ FileContext: parseRes, - Analyzer: TaintDetectorMock, + Analyzer: TaintDetector, } - patterns, err := TaintDetectorMock.Run(pass) + patterns, err := TaintDetector.Run(pass) + assert.NoError(t, err) + + dfg, err := createDataFlowGraph(pass) assert.NoError(t, err) + scopeTree := dfg.(*DataFlowGraph).ScopeTree + assert.NotNil(t, scopeTree) + assert.Len(t, patterns.(map[string]interface{})["sinkNodes"], 1) assert.Len(t, patterns.(map[string]interface{})["sourceNodes"], 1) assert.Len(t, patterns.(map[string]interface{})["sinkPatterns"], 1) assert.Len(t, patterns.(map[string]interface{})["sourcePatterns"], 1) + var sourceVar *ana.Variable + + for _, node := range patterns.(map[string]interface{})["sourceNodes"].([]*sitter.Node) { + parentNode := node.Parent().Parent().Parent() + + idScope := scopeTree.GetScope(parentNode.ChildByFieldName("left")) + assert.NotNil(t, idScope) + + idVar := idScope.Lookup(parentNode.ChildByFieldName("left").Content(parseRes.Source)) + assert.NotNil(t, idVar) + + sourceVar = idVar + } + for _, node := range patterns.(map[string]interface{})["sinkNodes"].([]*sitter.Node) { - t.Log(node.Content(parseRes.Source)) + parentNode, err := ana.GetRootNode(node) + assert.NoError(t, err) + arg := parentNode.ChildByFieldName("arguments").NamedChild(0) + + scope := scopeTree.GetScope(parentNode) + scopeVar := scope.Lookup(arg.Content(parseRes.Source)) + + assert.Equal(t, scopeVar, sourceVar) } + } diff --git a/checkers/javascript/testdata/taint_detector.test.js b/checkers/javascript/testdata/taint_detector.test.js new file mode 100644 index 00000000..50b4922f --- /dev/null +++ b/checkers/javascript/testdata/taint_detector.test.js @@ -0,0 +1,10 @@ +function getUserInput(key) { + + return document.getElementById(key).value; + +} + +userInput = getUserInput('username') + +// +perform_db_operation(userInput) \ No newline at end of file From bcd8d9ea9d91c36ad140e9f8b4a751ac0df1b0f6 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Thu, 12 Jun 2025 19:54:27 +0530 Subject: [PATCH 10/12] feat: accessing `TaintAnalyzer` instance from `AnalysisFunction` `Run` method --- analysis/analysis_functions.go | 12 +++++++----- analysis/directory.go | 2 ++ checkers/javascript/js_dataflow.go | 2 +- checkers/javascript/taint_detector.go | 22 ++++++++++++---------- checkers/javascript/taint_detector_test.go | 7 +++---- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/analysis/analysis_functions.go b/analysis/analysis_functions.go index 5d076bd2..26b86e4f 100644 --- a/analysis/analysis_functions.go +++ b/analysis/analysis_functions.go @@ -1,5 +1,9 @@ package analysis +type TaintAnalyzer interface { + GetAnalyzer(sources, sinks []string) Analyzer +} + func TaintRun(args ...interface{}) Analyzer { sources := args[0].([]string) sinks := args[1].([]string) @@ -9,9 +13,7 @@ func TaintRun(args ...interface{}) Analyzer { } func NewTaintAnalyzer(sources, sinks []string) Analyzer { - return Analyzer{ - Name: "taint_analyzer", - } + var taintAnalyzer TaintAnalyzer + analyzer := taintAnalyzer.GetAnalyzer(sources, sinks) + return analyzer } - - diff --git a/analysis/directory.go b/analysis/directory.go index 44eb50cb..22988ad0 100644 --- a/analysis/directory.go +++ b/analysis/directory.go @@ -6,6 +6,8 @@ func InitializeAnalysisFunction(fn AnalysisFunction) AnalysisFunction { switch fn.Name { case "taint": fn.Run = TaintRun + default: + break } RegisteredAnalysisFunctions = append(RegisteredAnalysisFunctions, fn) return fn diff --git a/checkers/javascript/js_dataflow.go b/checkers/javascript/js_dataflow.go index bbd56b16..cc3d381a 100644 --- a/checkers/javascript/js_dataflow.go +++ b/checkers/javascript/js_dataflow.go @@ -60,7 +60,7 @@ func createDataFlowGraph(pass *analysis.Pass) (interface{}, error) { scopeResult, err := buildScopeTree(pass) if err != nil { - return nil, fmt.Errorf("failed to build the scope tree \n") + return nil, fmt.Errorf("failed to build the scope tree: %v", err) } scopeTree := scopeResult.(*analysis.ScopeTree) diff --git a/checkers/javascript/taint_detector.go b/checkers/javascript/taint_detector.go index 3c9f73c8..5c586fb2 100644 --- a/checkers/javascript/taint_detector.go +++ b/checkers/javascript/taint_detector.go @@ -7,15 +7,17 @@ import ( "globstar.dev/analysis" ) -// var TaintDetector = &analysis.Analyzer{ -// Name: "taint_detector", -// Language: analysis.LangJs, -// Description: "Taint detector", -// Category: analysis.CategorySecurity, -// Severity: analysis.SeverityCritical, -// Requires: []*analysis.Analyzer{DataFlowAnalyzer}, -// Run: detectTaint([]string{"sink"}, []string{"source"}), -// } +func GetAnalyzer(sources, sinks []string) analysis.Analyzer { + return analysis.Analyzer{ + Name: "taint_detector", + Language: analysis.LangJs, + Description: "Taint detector", + Category: analysis.CategorySecurity, + Severity: analysis.SeverityCritical, + Requires: []*analysis.Analyzer{DataFlowAnalyzer}, + Run: detectTaint(sources, sinks), + } +} // var TaintDetector = &analysis.Analyzer{ // Name: "taint_detector", @@ -28,7 +30,7 @@ import ( // (expression_statement // (assignment_expression // right: (call_expression -// function: (identifier) @sourceName +// function: (identifier) @sourceName // ))(#eq? @sourceName "getUserInput"))`}, []string{` // (call_expression // function: (identifier) @sinkName diff --git a/checkers/javascript/taint_detector_test.go b/checkers/javascript/taint_detector_test.go index 3a9b5580..e74f7388 100644 --- a/checkers/javascript/taint_detector_test.go +++ b/checkers/javascript/taint_detector_test.go @@ -6,7 +6,6 @@ import ( sitter "github.com/smacker/go-tree-sitter" "github.com/stretchr/testify/assert" "globstar.dev/analysis" - ana "globstar.dev/analysis" ) func TestPatternDetection(t *testing.T) { @@ -43,7 +42,7 @@ perform_db_operation(userInput) (#eq? @sinkName "perform_db_operation"))`}), } parseRes := parseJsCode(t, []byte(source)) - pass := &ana.Pass{ + pass := &analysis.Pass{ FileContext: parseRes, Analyzer: TaintDetector, } @@ -61,7 +60,7 @@ perform_db_operation(userInput) assert.Len(t, patterns.(map[string]interface{})["sinkPatterns"], 1) assert.Len(t, patterns.(map[string]interface{})["sourcePatterns"], 1) - var sourceVar *ana.Variable + var sourceVar *analysis.Variable for _, node := range patterns.(map[string]interface{})["sourceNodes"].([]*sitter.Node) { parentNode := node.Parent().Parent().Parent() @@ -76,7 +75,7 @@ perform_db_operation(userInput) } for _, node := range patterns.(map[string]interface{})["sinkNodes"].([]*sitter.Node) { - parentNode, err := ana.GetRootNode(node) + parentNode, err := analysis.GetRootNode(node) assert.NoError(t, err) arg := parentNode.ChildByFieldName("arguments").NamedChild(0) From 9372bb082d44a4ca86266fc05c9dc4646761cfb4 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Fri, 13 Jun 2025 12:41:22 +0530 Subject: [PATCH 11/12] chore: debugging attempt --- checkers/javascript/taint_detector.go | 34 +++++++++++++-------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/checkers/javascript/taint_detector.go b/checkers/javascript/taint_detector.go index 5c586fb2..4e4bbe3e 100644 --- a/checkers/javascript/taint_detector.go +++ b/checkers/javascript/taint_detector.go @@ -19,23 +19,23 @@ func GetAnalyzer(sources, sinks []string) analysis.Analyzer { } } -// var TaintDetector = &analysis.Analyzer{ -// Name: "taint_detector", -// Language: analysis.LangJs, -// Description: "Taint detector", -// Category: analysis.CategorySecurity, -// Severity: analysis.SeverityCritical, -// Requires: []*analysis.Analyzer{DataFlowAnalyzer}, -// Run: detectTaint([]string{` -// (expression_statement -// (assignment_expression -// right: (call_expression -// function: (identifier) @sourceName -// ))(#eq? @sourceName "getUserInput"))`}, []string{` -// (call_expression -// function: (identifier) @sinkName -// (#eq? @sinkName "perform_db_operation"))`}), -// } +var TaintDetector = &analysis.Analyzer{ + Name: "taint_detector", + Language: analysis.LangJs, + Description: "Taint detector", + Category: analysis.CategorySecurity, + Severity: analysis.SeverityCritical, + Requires: []*analysis.Analyzer{DataFlowAnalyzer}, + Run: detectTaint([]string{` + (expression_statement + (assignment_expression + right: (call_expression + function: (identifier) @sourceName + ))(#eq? @sourceName "getUserInput"))`}, []string{` + (call_expression + function: (identifier) @sinkName + (#eq? @sinkName "perform_db_operation"))`}), +} func detectTaint(source []string, sink []string) func(pass *analysis.Pass) (any, error) { From 2c68a3252be8416ab256a4d8d9481668b02cb3f3 Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Sat, 14 Jun 2025 21:59:49 +0530 Subject: [PATCH 12/12] feat: succesfull added running of custom analysis functions with yaml based checkers --- analysis/analysis_functions.go | 21 +++---- analysis/analyzer.go | 59 +++++++++++++++++++ analysis/directory.go | 14 ----- analysis/directory_test.go | 37 ------------ .../testdata/mock-analysis-function.test.js | 1 + analysis/yaml.go | 41 +++++-------- analysis/yaml_test.go | 2 +- checkers/javascript/taint_detector.go | 31 +++------- go.mod | 1 + go.sum | 2 + pkg/cli/test_runner.go | 32 +++++++--- pkg/cli/test_runner_test.go | 28 +++++++++ .../testdata/mock-analysis-function.test.js | 12 ++++ pkg/cli/testdata/mock-analysis-function.yml | 23 ++++++++ 14 files changed, 183 insertions(+), 121 deletions(-) delete mode 100644 analysis/directory.go delete mode 100644 analysis/directory_test.go create mode 100644 pkg/cli/test_runner_test.go create mode 100644 pkg/cli/testdata/mock-analysis-function.test.js create mode 100644 pkg/cli/testdata/mock-analysis-function.yml diff --git a/analysis/analysis_functions.go b/analysis/analysis_functions.go index 26b86e4f..1c0de88f 100644 --- a/analysis/analysis_functions.go +++ b/analysis/analysis_functions.go @@ -1,19 +1,16 @@ package analysis -type TaintAnalyzer interface { - GetAnalyzer(sources, sinks []string) Analyzer -} - -func TaintRun(args ...interface{}) Analyzer { - sources := args[0].([]string) - sinks := args[1].([]string) +func TaintRun(args ...interface{}) func(*Pass) (any, error) { + return func(pass *Pass) (any, error) { + sources := args[0].([]string) + sinks := args[1].([]string) - analyzer := NewTaintAnalyzer(sources, sinks) - return analyzer + return NewTaintAnalyzer(sources, sinks), nil + } } -func NewTaintAnalyzer(sources, sinks []string) Analyzer { - var taintAnalyzer TaintAnalyzer - analyzer := taintAnalyzer.GetAnalyzer(sources, sinks) +func NewTaintAnalyzer(sources, sinks []string) *Analyzer { + analyzer := &Analyzer{} + return analyzer } diff --git a/analysis/analyzer.go b/analysis/analyzer.go index 399c0c07..30b3cedd 100644 --- a/analysis/analyzer.go +++ b/analysis/analyzer.go @@ -208,6 +208,65 @@ func RunAnalyzers(path string, analyzers []*Analyzer, fileFilter func(string) bo return raisedIssues, nil } +func RunAnalysisFunction(path string, analyzers []*Analyzer, fileFilter func(string) bool) ([]*Issue, error) { + raisedIssues := []*Issue{} + langAnalyzerMap := make(map[Language][]*Analyzer) + + for _, analyzer := range analyzers { + langAnalyzerMap[analyzer.Language] = append(langAnalyzerMap[analyzer.Language], findAnalyzers(analyzer)...) + } + + file, err := ParseFile(path) + if err != nil { + if err != ErrUnsupportedLanguage { + fmt.Println(err) + } + return raisedIssues, err + } + + fileSkipInfo := GatherSkipInfo(file) + + reportFunc := func(pass *Pass, node *sitter.Node, message string) { + raisedIssue := &Issue{ + Id: &pass.Analyzer.Name, + Node: node, + Message: message, + Filepath: pass.FileContext.FilePath, + } + + skipLines := fileSkipInfo + if !ContainsSkipcq(skipLines, raisedIssue) { + raisedIssues = append(raisedIssues, raisedIssue) + } + } + + for _, analyzers := range langAnalyzerMap { + pass := &Pass{ + FileContext: file, + Report: reportFunc, + ResultOf: make(map[*Analyzer]any), + ResultCache: make(map[*Analyzer]map[*ParseResult]any), + } + + for _, analyzer := range analyzers { + pass.Analyzer = analyzer + + result, err := analyzer.Run(pass) + if err != nil { + return raisedIssues, err + } + + pass.ResultOf[analyzer] = result + if _, ok := pass.ResultCache[analyzer]; !ok { + pass.ResultCache[analyzer] = make(map[*ParseResult]any) + } + pass.ResultCache[analyzer][file] = result + } + } + + return raisedIssues, nil +} + func ReportIssues(issues []*Issue, format string) ([]byte, error) { switch format { case "json": diff --git a/analysis/directory.go b/analysis/directory.go deleted file mode 100644 index 22988ad0..00000000 --- a/analysis/directory.go +++ /dev/null @@ -1,14 +0,0 @@ -package analysis - -var RegisteredAnalysisFunctions = []AnalysisFunction{} - -func InitializeAnalysisFunction(fn AnalysisFunction) AnalysisFunction { - switch fn.Name { - case "taint": - fn.Run = TaintRun - default: - break - } - RegisteredAnalysisFunctions = append(RegisteredAnalysisFunctions, fn) - return fn -} diff --git a/analysis/directory_test.go b/analysis/directory_test.go deleted file mode 100644 index 992a81aa..00000000 --- a/analysis/directory_test.go +++ /dev/null @@ -1,37 +0,0 @@ -package analysis - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestInitializeAnalysisFunction(t *testing.T) { - - fn := InitializeAnalysisFunction(AnalysisFunction{ - Name: "taint", - Parameters: map[string][]string{ - "sources": {"string"}, - "sinks": {"string"}, - }, - }) - - inbuiltTaintAnalyzer := fn.Run([]string{"source1", "source2"}, []string{"sink1", "sink2"}) - // assert.NoError(t, err) - - assert.Equal(t, inbuiltTaintAnalyzer.Name, "taint_analyzer") - -} - -func TestPopulationOfAnalysisFuncitonRegistry(t *testing.T) { - _ = InitializeAnalysisFunction(AnalysisFunction{ - Name: "taint", - Parameters: map[string][]string{ - "sources": {"string"}, - "sinks": {"string"}, - }, - }) - - assert.Equal(t, len(RegisteredAnalysisFunctions), 1) - -} diff --git a/analysis/testdata/mock-analysis-function.test.js b/analysis/testdata/mock-analysis-function.test.js index 2e9077c1..b0ace2d9 100644 --- a/analysis/testdata/mock-analysis-function.test.js +++ b/analysis/testdata/mock-analysis-function.test.js @@ -7,4 +7,5 @@ function getUserInput(key) { userInput = getUserInput('username') // A sink method, which performs some raw databse operation on the userInput +// perform_db_operation(userInput) \ No newline at end of file diff --git a/analysis/yaml.go b/analysis/yaml.go index 65aa4a99..af253a07 100644 --- a/analysis/yaml.go +++ b/analysis/yaml.go @@ -50,25 +50,22 @@ type PathFilter struct { type AnalysisFunction struct { Name string `yaml:"name"` Parameters map[string][]string `yaml:"parameters"` - Run func(args ...interface{}) Analyzer } type Yaml struct { - Language string `yaml:"language"` - Code string `yaml:"name"` - Message string `yaml:"message"` - Category Category `yaml:"category"` - Severity Severity `yaml:"severity"` - Pattern string `yaml:"pattern"` - Patterns []string `yaml:"patterns"` - Description string `yaml:"description"` - Exclude []string `yaml:"exclude,omitempty"` - Include []string `yaml:"include,omitempty"` - Filters []filterYaml `yaml:"filters,omitempty"` - PathFilter *pathFilterYaml `yaml:"path_filter,omitempty"` - Sink []string `yaml:"sink,omitempty"` - Source []string `yaml:"source,omitempty"` - AnalysisFunction AnalysisFunction `yaml:"analysisFunction,omitempty"` + Language string `yaml:"language"` + Code string `yaml:"name"` + Message string `yaml:"message"` + Category Category `yaml:"category"` + Severity Severity `yaml:"severity"` + Pattern string `yaml:"pattern"` + Patterns []string `yaml:"patterns"` + Description string `yaml:"description"` + Exclude []string `yaml:"exclude,omitempty"` + Include []string `yaml:"include,omitempty"` + Filters []filterYaml `yaml:"filters,omitempty"` + PathFilter *pathFilterYaml `yaml:"path_filter,omitempty"` + AnalysisFunction *AnalysisFunction `yaml:"analysisFunction,omitempty"` } type YamlAnalyzer struct { @@ -77,7 +74,7 @@ type YamlAnalyzer struct { NodeFilter []NodeFilter PathFilter *PathFilter Message string - AnalysisFunction AnalysisFunction + AnalysisFunction *AnalysisFunction } // ReadFromFile reads a pattern checker definition from a YAML config file. @@ -99,14 +96,6 @@ func ReadFromBytes(fileContent []byte) (Analyzer, YamlAnalyzer, error) { analysisFunction := checker.AnalysisFunction - if analysisFunction.Name != "" { - analysisFunction.Parameters = map[string][]string{ - "sources": checker.Source, - "sinks": checker.Sink, - } - InitializeAnalysisFunction(analysisFunction) - } - lang := DecodeLanguage(checker.Language) if lang == LangUnknown { return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("unknown language code: '%s'", checker.Language) @@ -220,7 +209,7 @@ func ReadFromBytes(fileContent []byte) (Analyzer, YamlAnalyzer, error) { NodeFilter: filters, PathFilter: pathFilter, Message: checker.Message, - AnalysisFunction: checker.AnalysisFunction, + AnalysisFunction: analysisFunction, } patternChecker.Run = RunYamlAnalyzer(yamlAnalyzer) diff --git a/analysis/yaml_test.go b/analysis/yaml_test.go index be77b873..d5b31998 100644 --- a/analysis/yaml_test.go +++ b/analysis/yaml_test.go @@ -82,7 +82,7 @@ func TestAnalysisFunction(t *testing.T) { assert.Equal(t, ana.Name, "run_taint_analysis") assert.Equal(t, yamlAna.AnalysisFunction.Name, "taint") assert.Equal(t, len(yamlAna.AnalysisFunction.Parameters["sources"]), 1) - t.Logf("%v", yamlAna.AnalysisFunction.Parameters["sources"]) assert.Equal(t, len(yamlAna.AnalysisFunction.Parameters["sinks"]), 1) assert.Equal(t, yamlAna.AnalysisFunction.Name, "taint") + } diff --git a/checkers/javascript/taint_detector.go b/checkers/javascript/taint_detector.go index 4e4bbe3e..bbc57afe 100644 --- a/checkers/javascript/taint_detector.go +++ b/checkers/javascript/taint_detector.go @@ -7,36 +7,18 @@ import ( "globstar.dev/analysis" ) -func GetAnalyzer(sources, sinks []string) analysis.Analyzer { - return analysis.Analyzer{ +func GetTaintFunction(source, sink []string) *analysis.Analyzer { + return &analysis.Analyzer{ Name: "taint_detector", Language: analysis.LangJs, Description: "Taint detector", Category: analysis.CategorySecurity, Severity: analysis.SeverityCritical, Requires: []*analysis.Analyzer{DataFlowAnalyzer}, - Run: detectTaint(sources, sinks), + Run: detectTaint(source, sink), } } -var TaintDetector = &analysis.Analyzer{ - Name: "taint_detector", - Language: analysis.LangJs, - Description: "Taint detector", - Category: analysis.CategorySecurity, - Severity: analysis.SeverityCritical, - Requires: []*analysis.Analyzer{DataFlowAnalyzer}, - Run: detectTaint([]string{` - (expression_statement - (assignment_expression - right: (call_expression - function: (identifier) @sourceName - ))(#eq? @sourceName "getUserInput"))`}, []string{` - (call_expression - function: (identifier) @sinkName - (#eq? @sinkName "perform_db_operation"))`}), -} - func detectTaint(source []string, sink []string) func(pass *analysis.Pass) (any, error) { return func(pass *analysis.Pass) (interface{}, error) { @@ -113,15 +95,16 @@ func detectTaint(source []string, sink []string) func(pass *analysis.Pass) (any, return nil, fmt.Errorf("no sink or source pattern matched") } - // Get the data flow graph to track variable relationships + pass.Report(pass, sinkNodes[0], "sink node found") + // // Get the data flow graph to track variable relationships - // Track source variables that flow into sinks + // // Track source variables that flow into sinks // var taintedFlows []struct { // source *sitter.Node // sink *sitter.Node // } - // For each source node, get its variable + // // For each source node, get its variable // for _, sourceNode := range sourceNodes { // // Get the assignment node (parent.parent.parent of source capture) // assignNode := sourceNode.Parent().Parent().Parent() diff --git a/go.mod b/go.mod index ac5c5cdd..1fed016b 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 github.com/stretchr/testify v1.10.0 github.com/urfave/cli/v3 v3.0.0-beta1 + golang.org/x/tools v0.11.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 5795f49a..2bd60a58 100644 --- a/go.sum +++ b/go.sum @@ -110,6 +110,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.11.0 h1:EMCa6U9S2LtZXLAMoWiR/R8dAQFRqbAitmbJ2UKhoi8= +golang.org/x/tools v0.11.0/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/pkg/cli/test_runner.go b/pkg/cli/test_runner.go index 62fb412d..3694ed1f 100644 --- a/pkg/cli/test_runner.go +++ b/pkg/cli/test_runner.go @@ -10,6 +10,7 @@ import ( "strings" ana "globstar.dev/analysis" + js "globstar.dev/checkers/javascript" ) func runTests(dir string) (bool, error) { @@ -91,17 +92,11 @@ func runTestCases(dir string) (passed bool, err error) { fmt.Fprintf(os.Stderr, "Running test case: %s\n", filepath.Base(tc.yamlCheckerPath)) // Read and parse the checker definition - checker, _, err := ana.ReadFromFile(tc.yamlCheckerPath) + checker, yamlAnalyzer, err := ana.ReadFromFile(tc.yamlCheckerPath) if err != nil { return false, err } - // Parse the test file - // analyzer, err := analysis.FromFile(tc.testFile, []analysis.Checker{}) - // if err != nil { - // return false, err - // } - want, err := findExpectedLines(tc.testFile) if err != nil { return false, err @@ -112,6 +107,17 @@ func runTestCases(dir string) (passed bool, err error) { return false, err } + var analysisFuncAnalyzer *ana.Analyzer + if yamlAnalyzer.AnalysisFunction != nil { + analysisFuncAnalyzer = GetAnalysisFunction(&yamlAnalyzer) + analysisFunctionIssues, err := ana.RunAnalysisFunction(tc.testFile, []*ana.Analyzer{analysisFuncAnalyzer}, nil) + if err != nil { + return false, err + } + + issues = append(issues, analysisFunctionIssues...) + } + var got []int for _, issue := range issues { got = append(got, int(issue.Node.Range().StartPoint.Row)+1) // 0-indexed to 1-indexed @@ -180,3 +186,15 @@ func findExpectedLines(filePath string) ([]int, error) { return expectedLines, nil } + +func GetAnalysisFunction(yamlAnalyzer *ana.YamlAnalyzer) *ana.Analyzer { + analysisFunction := yamlAnalyzer.AnalysisFunction + + switch analysisFunction.Name { + case "taint": + return js.GetTaintFunction(yamlAnalyzer.AnalysisFunction.Parameters["sources"], yamlAnalyzer.AnalysisFunction.Parameters["sinks"]) + default: + return nil + } + +} diff --git a/pkg/cli/test_runner_test.go b/pkg/cli/test_runner_test.go new file mode 100644 index 00000000..2f1f2726 --- /dev/null +++ b/pkg/cli/test_runner_test.go @@ -0,0 +1,28 @@ +package cli + +import ( + "testing" + + "github.com/stretchr/testify/assert" + ana "globstar.dev/analysis" +) + +func TestGetAnalysisFunction(t *testing.T) { + path := "testdata/mock-analysis-function.yml" + _, yamlAnalyzer, err := ana.ReadFromFile(path) + analysisFunction := yamlAnalyzer.AnalysisFunction + assert.NotNil(t, analysisFunction) + assert.Len(t, analysisFunction.Parameters, 2) + assert.Len(t, analysisFunction.Parameters["sources"], 1) + assert.NoError(t, err) + + analysisFuncAnalyzer := GetAnalysisFunction(&yamlAnalyzer) + assert.Equal(t, analysisFuncAnalyzer.Name, "taint_detector") +} + +func TestAnalysisFunction(t *testing.T) { + path := "testdata/mock-analysis-function.yml" + passed, err := runTests(path) + assert.NoError(t, err) + assert.True(t, passed) +} diff --git a/pkg/cli/testdata/mock-analysis-function.test.js b/pkg/cli/testdata/mock-analysis-function.test.js new file mode 100644 index 00000000..ab0bffb0 --- /dev/null +++ b/pkg/cli/testdata/mock-analysis-function.test.js @@ -0,0 +1,12 @@ +function getUserInput(key) { + + return document.getElementById(key).value; + +} + +userInput = getUserInput('username') + +// A sink method, which performs some raw databse operation on the userInput + +// +perform_db_operation(userInput) \ No newline at end of file diff --git a/pkg/cli/testdata/mock-analysis-function.yml b/pkg/cli/testdata/mock-analysis-function.yml new file mode 100644 index 00000000..400023c4 --- /dev/null +++ b/pkg/cli/testdata/mock-analysis-function.yml @@ -0,0 +1,23 @@ +name: "run_taint_analysis" +language: javascript +category: security +severity: high +message: "This is just a mock checker" +analysisFunction: + name: taint + parameters: + sources: + - | + (call_expression + function: (identifier) @sourceName + (#eq? @sourceName "getUserInput")) + sinks: + - | + (call_expression + function: (identifier) @sinkName + (#eq? @sinkName "perform_db_operation")) + +pattern: | + (call_expression) + +description: "Runs a taint analysis on the provided function and its parameters." \ No newline at end of file