diff --git a/analysis/analysis_functions.go b/analysis/analysis_functions.go new file mode 100644 index 00000000..1c0de88f --- /dev/null +++ b/analysis/analysis_functions.go @@ -0,0 +1,16 @@ +package analysis + +func TaintRun(args ...interface{}) func(*Pass) (any, error) { + return func(pass *Pass) (any, error) { + sources := args[0].([]string) + sinks := args[1].([]string) + + return NewTaintAnalyzer(sources, sinks), nil + } +} + +func NewTaintAnalyzer(sources, sinks []string) *Analyzer { + analyzer := &Analyzer{} + + return analyzer +} diff --git a/analysis/analyzer.go b/analysis/analyzer.go index 399c0c07..30b3cedd 100644 --- a/analysis/analyzer.go +++ b/analysis/analyzer.go @@ -208,6 +208,65 @@ func RunAnalyzers(path string, analyzers []*Analyzer, fileFilter func(string) bo return raisedIssues, nil } +func RunAnalysisFunction(path string, analyzers []*Analyzer, fileFilter func(string) bool) ([]*Issue, error) { + raisedIssues := []*Issue{} + langAnalyzerMap := make(map[Language][]*Analyzer) + + for _, analyzer := range analyzers { + langAnalyzerMap[analyzer.Language] = append(langAnalyzerMap[analyzer.Language], findAnalyzers(analyzer)...) + } + + file, err := ParseFile(path) + if err != nil { + if err != ErrUnsupportedLanguage { + fmt.Println(err) + } + return raisedIssues, err + } + + fileSkipInfo := GatherSkipInfo(file) + + reportFunc := func(pass *Pass, node *sitter.Node, message string) { + raisedIssue := &Issue{ + Id: &pass.Analyzer.Name, + Node: node, + Message: message, + Filepath: pass.FileContext.FilePath, + } + + skipLines := fileSkipInfo + if !ContainsSkipcq(skipLines, raisedIssue) { + raisedIssues = append(raisedIssues, raisedIssue) + } + } + + for _, analyzers := range langAnalyzerMap { + pass := &Pass{ + FileContext: file, + Report: reportFunc, + ResultOf: make(map[*Analyzer]any), + ResultCache: make(map[*Analyzer]map[*ParseResult]any), + } + + for _, analyzer := range analyzers { + pass.Analyzer = analyzer + + result, err := analyzer.Run(pass) + if err != nil { + return raisedIssues, err + } + + pass.ResultOf[analyzer] = result + if _, ok := pass.ResultCache[analyzer]; !ok { + pass.ResultCache[analyzer] = make(map[*ParseResult]any) + } + pass.ResultCache[analyzer][file] = result + } + } + + return raisedIssues, nil +} + func ReportIssues(issues []*Issue, format string) ([]byte, error) { switch format { case "json": diff --git a/analysis/feature b/analysis/feature new file mode 100644 index 00000000..7812136f --- /dev/null +++ b/analysis/feature @@ -0,0 +1,62 @@ +type AnalysisFunction struct { + Name string + Parameters []reflect.Type + Run func(args ...interface{}) (Analyzer, error) +} + +--- +functions/run_taint_analysis.go +--- +TaintAnalysisFunction := AnalysisFunction{ + Name: "taint", + Parameters: []reflect.Type{ + reflect.TypeOf([]string{}), // sources + reflect.TypeOf([]string{}), // sinks + }, + Description: "Runs a taint analysis on the provided function and its parameters.", + Run: func(args ...interface{}) (Analyzer, error) { + sources := args[0].([]string) + sinks := args[1].([]string) + + analyzer := NewTaintAnalyzer(sources, sinks) + return analyzer, nil + } +} + +func NewTaintAnalyzer(sources, sinks []string) Analyzer { + return &TaintAnalyzer{ + Sources: sources, + Sinks: sinks, + } +} + +--- +directory.go +--- +functions := []AnalysisFunction{ + TaintAnalysisFunction, +} + +for _, function := range functions { + analyzer, err := function.Run(function.Parameters...) + analyzers = append(analyzers, analyzer) +} + + + +--- +name: "run_taint_analysis" +language: go +description: "Runs a taint analysis on the provided function and its parameters." +analysisFunction: + name: taint + parameters: + sources: + - (query) + sinks: + - ( + (callexpression method @methodname (parameterList)) + #match @methodname "get_user_input" + ) + - (function (parameterList)) + - (function (parameterList)) \ No newline at end of file diff --git a/analysis/language.go b/analysis/language.go index 3061d108..8a8f051b 100644 --- a/analysis/language.go +++ b/analysis/language.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "strings" sitter "github.com/smacker/go-tree-sitter" @@ -83,6 +84,66 @@ const ( LangSwift ) +func DecodeLanguage(language string) Language { + language = strings.ToLower(language) + switch language { + case "javascript", "js": + return LangJs + case "typescript", "ts": + return LangTs + case "jsx", "tsx": + return LangTsx + case "python", "py": + return LangPy + case "ocaml", "ml": + return LangOCaml + case "docker", "dockerfile": + return LangDockerfile + case "java": + return LangJava + case "kotlin", "kt": + return LangKotlin + case "rust", "rs": + return LangRust + case "ruby", "rb": + return LangRuby + case "lua": + return LangLua + case "yaml", "yml": + return LangYaml + case "sql": + return LangSql + case "css", "css3": + return LangCss + case "markdown", "md": + return LangMarkdown + case "sh", "bash": + return LangBash + case "csharp", "cs": + return LangCsharp + case "elixir", "ex": + return LangElixir + case "elm": + return LangElm + case "go": + return LangGo + case "groovy": + return LangGroovy + case "hcl", "tf": + return LangHcl + case "html": + return LangHtml + case "php": + return LangPhp + case "scala": + return LangScala + case "swift": + return LangSwift + default: + return LangUnknown + } +} + // tsGrammarForLang returns the tree-sitter grammar for the given language. // May return `nil` when `lang` is `LangUnkown`. func (lang Language) Grammar() *sitter.Language { @@ -169,7 +230,7 @@ func LanguageFromFilePath(path string) Language { return LangYaml case ".css": return LangCss - case ".dockerfile": + case ".dockerfile", ".Dockerfile": return LangDockerfile case ".md": return LangMarkdown diff --git a/analysis/scope.go b/analysis/scope.go index 91c971a4..1a55c070 100644 --- a/analysis/scope.go +++ b/analysis/scope.go @@ -147,9 +147,9 @@ func buildScopeTree( if builder.NodeCreatesScope(node) { nextScope = NewScope(scope) scopeOfNode[node] = nextScope - scope.AstNode = node if scope != nil { scope.Children = append(scope.Children, nextScope) + scope.AstNode = node } else { scope = nextScope // root } diff --git a/analysis/testdata/mock-analysis-function.test.js b/analysis/testdata/mock-analysis-function.test.js new file mode 100644 index 00000000..b0ace2d9 --- /dev/null +++ b/analysis/testdata/mock-analysis-function.test.js @@ -0,0 +1,11 @@ +function getUserInput(key) { + + return document.getElementById(key).value; + +} + +userInput = getUserInput('username') + +// A sink method, which performs some raw databse operation on the userInput +// +perform_db_operation(userInput) \ No newline at end of file diff --git a/analysis/testdata/mock-analysis-function.yml b/analysis/testdata/mock-analysis-function.yml new file mode 100644 index 00000000..400023c4 --- /dev/null +++ b/analysis/testdata/mock-analysis-function.yml @@ -0,0 +1,23 @@ +name: "run_taint_analysis" +language: javascript +category: security +severity: high +message: "This is just a mock checker" +analysisFunction: + name: taint + parameters: + sources: + - | + (call_expression + function: (identifier) @sourceName + (#eq? @sourceName "getUserInput")) + sinks: + - | + (call_expression + function: (identifier) @sinkName + (#eq? @sinkName "perform_db_operation")) + +pattern: | + (call_expression) + +description: "Runs a taint analysis on the provided function and its parameters." \ No newline at end of file diff --git a/analysis/testdata/mock-checker.yml b/analysis/testdata/mock-checker.yml new file mode 100644 index 00000000..db1a5592 --- /dev/null +++ b/analysis/testdata/mock-checker.yml @@ -0,0 +1,11 @@ +language: javascript +name: mock-checker +message: "This is just a mock checker" +category: style +severity: info +pattern: + (call_expression) @mock-checker +description: | + This is a mock checker. + + diff --git a/analysis/testdata/node-filter-checker.yml b/analysis/testdata/node-filter-checker.yml new file mode 100644 index 00000000..0a5a9066 --- /dev/null +++ b/analysis/testdata/node-filter-checker.yml @@ -0,0 +1,9 @@ +language: javascript +name: node-filter-checker +message: "Variable @var found inside function" +category: style +severity: info +pattern: (variable_declarator) @var @node-filter-checker +filters: + - pattern-inside: (function_declaration) +description: "Check for variables declared inside functions" \ No newline at end of file diff --git a/analysis/testdata/node-filter-test-checker.test.js b/analysis/testdata/node-filter-test-checker.test.js new file mode 100644 index 00000000..c8431f79 --- /dev/null +++ b/analysis/testdata/node-filter-test-checker.test.js @@ -0,0 +1,10 @@ +console.log("Hello, world!"); + +function foo(){ + // + console.log("This should be detected"); + + /* + console.log("This Should not be detected"); + */ +} \ No newline at end of file diff --git a/analysis/testdata/node-filter-test-checker.yml b/analysis/testdata/node-filter-test-checker.yml new file mode 100644 index 00000000..dd963872 --- /dev/null +++ b/analysis/testdata/node-filter-test-checker.yml @@ -0,0 +1,15 @@ +language: javascript +name: node-filter-test-checker +message: "Variable @var found inside function" +category: style +severity: info +pattern: > + (call_expression + function: (member_expression + object: (identifier) @obj + property: (property_identifier) @method + (#eq? @obj "console"))) @node-filter-test-checker +filters: + - pattern-inside: (function_declaration) + - pattern-not-inside: (comment) +description: "Check for variables declared inside functions" diff --git a/analysis/testrunner.go b/analysis/testrunner.go index e0535ae4..7842b07a 100644 --- a/analysis/testrunner.go +++ b/analysis/testrunner.go @@ -210,6 +210,13 @@ func RunAnalyzerTests(testDir string, analyzers []*Analyzer) (string, string, bo // if there's a test file in the testDir for which there's no analyzer, // it's most likely a YAML checker test, so skip it + + // yamlAnalyzers, err := discoverYamlAnalyzers(testDir) + // if err != nil { + // return "", "", false, err + // } + // analyzers = append(analyzers, yamlAnalyzers...) + likelyTestFiles := []string{} for _, analyzer := range analyzers { likelyTestFiles = append(likelyTestFiles, fmt.Sprintf("%s.test%s", analyzer.Name, GetExtFromLanguage(analyzer.Language))) diff --git a/analysis/walk.go b/analysis/walk.go index 06273995..91a018e7 100644 --- a/analysis/walk.go +++ b/analysis/walk.go @@ -1,6 +1,8 @@ package analysis import ( + "fmt" + sitter "github.com/smacker/go-tree-sitter" ) @@ -94,3 +96,16 @@ func FirstChildOfType(node *sitter.Node, nodeType string) *sitter.Node { return nil } + +func GetRootNode(node *sitter.Node) (*sitter.Node, error) { + current := node + + if current.Parent() == nil { + return current, fmt.Errorf("at the top-most level for the node") + } + for current.Parent() != nil { + current = current.Parent() + } + + return current, nil +} diff --git a/analysis/yaml.go b/analysis/yaml.go new file mode 100644 index 00000000..af253a07 --- /dev/null +++ b/analysis/yaml.go @@ -0,0 +1,306 @@ +package analysis + +import ( + "fmt" + "os" + "strings" + + "github.com/gobwas/glob" + sitter "github.com/smacker/go-tree-sitter" + "gopkg.in/yaml.v3" +) + +// To get a node back from a tree-sitter query, it *must* have a capture name. +// So: (call_expression) will match nothing, but (call_expression) @some_key +// will match all call expressions. +// For filtering patterns with clauses in the yaml file, like: +// filters: +// - pattern-inside: (call_expression) +// - pattern-not-inside: (catch_block) +// +// We need a to append a key name at the end of the pattern written by the user. +// This is the key that we will use. +const filterPatternKey = "__filter__key__" + +type filterYaml struct { + PatternInside string `yaml:"pattern-inside,omitempty"` + PatternNotInside string `yaml:"pattern-not-inside,omitempty"` +} + +type pathFilterYaml struct { + Exclude []string `yaml:"exclude,omitempty"` + Include []string `yaml:"include,omitempty"` +} + +// NodeFilter is a filter that can be applied to a PatternChecker to restrict +// the the nodes that the checker is applied to. +// The checker is only applied to nodes that have a parent matching (or not matching) the query. +type NodeFilter struct { + query *sitter.Query + shouldMatch bool +} + +// PathFilter is a glob that can be applied to a PatternChecker to restrict +// the files that the checker is applied to. +type PathFilter struct { + ExcludeGlobs []glob.Glob + IncludeGlobs []glob.Glob +} + +type AnalysisFunction struct { + Name string `yaml:"name"` + Parameters map[string][]string `yaml:"parameters"` +} + +type Yaml struct { + Language string `yaml:"language"` + Code string `yaml:"name"` + Message string `yaml:"message"` + Category Category `yaml:"category"` + Severity Severity `yaml:"severity"` + Pattern string `yaml:"pattern"` + Patterns []string `yaml:"patterns"` + Description string `yaml:"description"` + Exclude []string `yaml:"exclude,omitempty"` + Include []string `yaml:"include,omitempty"` + Filters []filterYaml `yaml:"filters,omitempty"` + PathFilter *pathFilterYaml `yaml:"path_filter,omitempty"` + AnalysisFunction *AnalysisFunction `yaml:"analysisFunction,omitempty"` +} + +type YamlAnalyzer struct { + Analyzer Analyzer + Patterns []*sitter.Query + NodeFilter []NodeFilter + PathFilter *PathFilter + Message string + AnalysisFunction *AnalysisFunction +} + +// ReadFromFile reads a pattern checker definition from a YAML config file. +func ReadFromFile(filePath string) (Analyzer, YamlAnalyzer, error) { + fileContent, err := os.ReadFile(filePath) + if err != nil { + return Analyzer{}, YamlAnalyzer{}, err + } + + return ReadFromBytes(fileContent) +} + +// ReadFromBytes reads a pattern checker definition from bytes array +func ReadFromBytes(fileContent []byte) (Analyzer, YamlAnalyzer, error) { + var checker Yaml + if err := yaml.Unmarshal(fileContent, &checker); err != nil { + return Analyzer{}, YamlAnalyzer{}, err + } + + analysisFunction := checker.AnalysisFunction + + lang := DecodeLanguage(checker.Language) + if lang == LangUnknown { + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("unknown language code: '%s'", checker.Language) + } + + if checker.Code == "" { + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("no name provided in checker definition") + } + + if checker.Message == "" { + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("no message provided in checker '%s'", checker.Code) + } + + var patterns []*sitter.Query + if checker.Pattern != "" { + pattern, err := sitter.NewQuery([]byte(checker.Pattern), lang.Grammar()) + if err != nil { + return Analyzer{}, YamlAnalyzer{}, err + } + patterns = append(patterns, pattern) + } else if len(checker.Patterns) > 0 { + for _, patternStr := range checker.Patterns { + pattern, err := sitter.NewQuery([]byte(patternStr), lang.Grammar()) + if err != nil { + return Analyzer{}, YamlAnalyzer{}, err + } + patterns = append(patterns, pattern) + } + } else { + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("no pattern provided in checker '%s'", checker.Code) + } + + if checker.Pattern != "" && len(checker.Patterns) > 0 { + return Analyzer{}, YamlAnalyzer{}, fmt.Errorf("only one of 'pattern' or 'patterns' can be provided in a checker definition") + } + + // include and exclude patterns + var pathFilter *PathFilter + if checker.Exclude != nil || checker.Include != nil { + pathFilter = &PathFilter{ + ExcludeGlobs: make([]glob.Glob, 0, len(checker.Exclude)), + IncludeGlobs: make([]glob.Glob, 0, len(checker.Include)), + } + + for _, exclude := range checker.Exclude { + g, err := glob.Compile(exclude) + if err != nil { + return Analyzer{}, YamlAnalyzer{}, err + } + pathFilter.ExcludeGlobs = append(pathFilter.ExcludeGlobs, g) + } + + for _, include := range checker.Include { + g, err := glob.Compile(include) + if err != nil { + return Analyzer{}, YamlAnalyzer{}, err + } + pathFilter.IncludeGlobs = append(pathFilter.IncludeGlobs, g) + } + } + + // node filters + var filters []NodeFilter + if checker.Filters != nil { + for _, filter := range checker.Filters { + if filter.PatternInside != "" { + queryStr := filter.PatternInside + " @" + filterPatternKey + query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) + if err != nil { + return Analyzer{}, YamlAnalyzer{}, err + } + + filters = append(filters, NodeFilter{ + query: query, + shouldMatch: true, + }) + } + + if filter.PatternNotInside != "" { + queryStr := filter.PatternNotInside + " @" + filterPatternKey + query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) + if err != nil { + return Analyzer{}, YamlAnalyzer{}, err + } + + filters = append(filters, NodeFilter{ + query: query, + shouldMatch: false, + }) + } + } + } + + patternChecker := &Analyzer{ + Name: checker.Code, + Language: lang, + Description: checker.Description, + Category: checker.Category, + Severity: checker.Severity, + } + + yamlAnalyzer := &YamlAnalyzer{ + Analyzer: Analyzer{ + Name: checker.Code, + Language: lang, + Description: checker.Description, + Category: checker.Category, + Severity: checker.Severity, + }, + Patterns: patterns, + NodeFilter: filters, + PathFilter: pathFilter, + Message: checker.Message, + AnalysisFunction: analysisFunction, + } + + patternChecker.Run = RunYamlAnalyzer(yamlAnalyzer) + return *patternChecker, *yamlAnalyzer, nil +} + +func RunYamlAnalyzer(YamlAnalyzer *YamlAnalyzer) func(pass *Pass) (any, error) { + return func(pass *Pass) (any, error) { + queries := YamlAnalyzer.Patterns + for _, query := range queries { + qc := sitter.NewQueryCursor() + defer qc.Close() + qc.Exec(query, pass.FileContext.Ast) + for { + m, ok := qc.NextMatch() + if !ok { + break + } + m = qc.FilterPredicates(m, pass.FileContext.Source) + for _, capture := range m.Captures { + captureName := query.CaptureNameForId(capture.Index) + if captureName == pass.Analyzer.Name && YamlAnalyzer.runParentFilters(pass.FileContext.Source, capture.Node) { + message := YamlAnalyzer.Message + for _, capture := range m.Captures { + captureName := query.CaptureNameForId(capture.Index) + message = strings.ReplaceAll(message, "@"+captureName, capture.Node.Content(pass.FileContext.Source)) + } + + pass.Report(pass, capture.Node, message) + } + } + + } + } + return nil, nil + } + +} + +func (ana *YamlAnalyzer) runParentFilters(source []byte, capture *sitter.Node) bool { + filters := ana.NodeFilter + if len(filters) == 0 { + return true + } + + for _, filter := range filters { + shouldMatch := filter.shouldMatch + nodeMatched := false + + for parent := capture.Parent(); parent != nil; parent = parent.Parent() { + if filterMatchesParent(&filter, parent, source) { + nodeMatched = true + if !shouldMatch { + return false + } else { + break + } + } + } + + if !nodeMatched && shouldMatch { + return false + } + } + + return true +} + +func filterMatchesParent(filter *NodeFilter, parent *sitter.Node, source []byte) bool { + qc := sitter.NewQueryCursor() + defer qc.Close() + + qc.Exec(filter.query, parent) + + for { + m, ok := qc.NextMatch() + if !ok { + break + } + + m = qc.FilterPredicates(m, source) + for _, capture := range m.Captures { + captureName := filter.query.CaptureNameForId(capture.Index) + if captureName == filterPatternKey && capture.Node == parent { + return true + } + } + } + + return false +} + +// TODO: Add a new field inside the Yaml Strcut, that takes in Function field +// Introduction of a new Struct for storing analysisFunction diff --git a/analysis/yaml_test.go b/analysis/yaml_test.go new file mode 100644 index 00000000..d5b31998 --- /dev/null +++ b/analysis/yaml_test.go @@ -0,0 +1,88 @@ +package analysis + +import ( + "testing" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReadFile(t *testing.T) { + path := "./testdata/mock-checker.yml" + ana, anaYaml, err := ReadFromFile(path) + + require.Nil(t, err) + name := ana.Name + assert.Equal(t, name, "mock-checker") + language := ana.Language + assert.Equal(t, language, LangJs) + category := ana.Category + assert.Equal(t, category, CategoryStyle) + severity := ana.Severity + assert.Equal(t, severity, SeverityInfo) + assert.Equal(t, anaYaml.Message, "This is just a mock checker") + assert.Equal(t, len(anaYaml.Patterns), 1) +} + +func TestNodeFilters(t *testing.T) { + jsData := ` + var globalVar = 1; // shouldn't match + function test() { + var localVar = 2; // Should match + let anotherVar = 3; // should match + } + ` + path := "./testdata/node-filter-checker.yml" + ana, _, err := ReadFromFile(path) + require.NoError(t, err, "Failed to read YAML data") + + parsedJs, err := Parse("", []byte(jsData), LangJs, LangJs.Grammar()) + require.NoError(t, err, "Failed to parse JS data") + + var matchCount int + var matches []string + + reportFunc := func(pass *Pass, node *sitter.Node, message string) { + matchCount++ + t.Log(node.Content(pass.FileContext.Source)) + matches = append(matches, message) + } + + pass := &Pass{ + Analyzer: &ana, + FileContext: parsedJs, + Report: reportFunc, + Files: []*ParseResult{parsedJs}, + } + + _, err = ana.Run(pass) + require.NoError(t, err, "Failed to run YAML analyzer") + assert.Equal(t, matchCount, 2, "Expected 2 matches") +} + +func TestNodeFilterWithTests(t *testing.T) { + path := "./testdata/node-filter-test-checker.yml" + ana, yamlAna, err := ReadFromFile(path) + require.NoError(t, err, "Failed to read YAML data") + assert.Equal(t, ana.Name, "node-filter-test-checker") + assert.Len(t, yamlAna.NodeFilter, 2) + + diff, log, passed, err := RunAnalyzerTests("./testdata", []*Analyzer{&ana}) + require.NoError(t, err, "Failed to run analyzer tests") + t.Logf("Diff: %s", diff) + t.Logf("Log: %s", log) + assert.True(t, passed) +} + +func TestAnalysisFunction(t *testing.T) { + path := "./testdata/mock-analysis-function.yml" + ana, yamlAna, err := ReadFromFile(path) + require.NoError(t, err, "Failed to read YAML data") + assert.Equal(t, ana.Name, "run_taint_analysis") + assert.Equal(t, yamlAna.AnalysisFunction.Name, "taint") + assert.Equal(t, len(yamlAna.AnalysisFunction.Parameters["sources"]), 1) + assert.Equal(t, len(yamlAna.AnalysisFunction.Parameters["sinks"]), 1) + assert.Equal(t, yamlAna.AnalysisFunction.Name, "taint") + +} diff --git a/checkers/checker.go b/checkers/checker.go index 9ed39107..627840ac 100644 --- a/checkers/checker.go +++ b/checkers/checker.go @@ -8,13 +8,12 @@ import ( "path/filepath" goAnalysis "globstar.dev/analysis" - "globstar.dev/pkg/analysis" ) //go:embed **/*.y*ml var builtinCheckers embed.FS -func findYamlCheckers(checkersMap map[analysis.Language][]analysis.YamlChecker) func(path string, d fs.DirEntry, err error) error { +func findYamlCheckers(checkersMap map[goAnalysis.Language][]goAnalysis.Analyzer) func(path string, d fs.DirEntry, err error) error { return func(path string, d fs.DirEntry, err error) error { if err != nil { return nil @@ -35,25 +34,25 @@ func findYamlCheckers(checkersMap map[analysis.Language][]analysis.YamlChecker) return nil } - patternChecker, err := analysis.ReadFromBytes(fileContent) + patternChecker, _, err := goAnalysis.ReadFromBytes(fileContent) if err != nil { return fmt.Errorf("invalid checker '%s': %s", d.Name(), err.Error()) } - lang := patternChecker.Language() + lang := patternChecker.Language checkersMap[lang] = append(checkersMap[lang], patternChecker) return nil } } -func LoadBuiltinYamlCheckers() (map[analysis.Language][]analysis.YamlChecker, error) { - checkersMap := make(map[analysis.Language][]analysis.YamlChecker) +func LoadBuiltinYamlCheckers() (map[goAnalysis.Language][]goAnalysis.Analyzer, error) { + checkersMap := make(map[goAnalysis.Language][]goAnalysis.Analyzer) err := fs.WalkDir(builtinCheckers, ".", findYamlCheckers(checkersMap)) return checkersMap, err } -func LoadCustomYamlCheckers(dir string) (map[analysis.Language][]analysis.YamlChecker, error) { - checkersMap := make(map[analysis.Language][]analysis.YamlChecker) +func LoadCustomYamlCheckers(dir string) (map[goAnalysis.Language][]goAnalysis.Analyzer, error) { + checkersMap := make(map[goAnalysis.Language][]goAnalysis.Analyzer) err := fs.WalkDir(os.DirFS(dir), ".", findYamlCheckers(checkersMap)) return checkersMap, err } diff --git a/checkers/javascript/js_dataflow.go b/checkers/javascript/js_dataflow.go index bbd56b16..cc3d381a 100644 --- a/checkers/javascript/js_dataflow.go +++ b/checkers/javascript/js_dataflow.go @@ -60,7 +60,7 @@ func createDataFlowGraph(pass *analysis.Pass) (interface{}, error) { scopeResult, err := buildScopeTree(pass) if err != nil { - return nil, fmt.Errorf("failed to build the scope tree \n") + return nil, fmt.Errorf("failed to build the scope tree: %v", err) } scopeTree := scopeResult.(*analysis.ScopeTree) diff --git a/checkers/javascript/taint_detector.go b/checkers/javascript/taint_detector.go new file mode 100644 index 00000000..bbc57afe --- /dev/null +++ b/checkers/javascript/taint_detector.go @@ -0,0 +1,178 @@ +package javascript + +import ( + "fmt" + + sitter "github.com/smacker/go-tree-sitter" + "globstar.dev/analysis" +) + +func GetTaintFunction(source, sink []string) *analysis.Analyzer { + return &analysis.Analyzer{ + Name: "taint_detector", + Language: analysis.LangJs, + Description: "Taint detector", + Category: analysis.CategorySecurity, + Severity: analysis.SeverityCritical, + Requires: []*analysis.Analyzer{DataFlowAnalyzer}, + Run: detectTaint(source, sink), + } +} + +func detectTaint(source []string, sink []string) func(pass *analysis.Pass) (any, error) { + + return func(pass *analysis.Pass) (interface{}, error) { + dfg := pass.ResultOf[DataFlowAnalyzer].(*DataFlowGraph) + if dfg == nil { + return nil, fmt.Errorf("no data flow graph found") + } + scopeTree := dfg.ScopeTree + if scopeTree == nil { + fmt.Println("no scope tree found") + return nil, fmt.Errorf("no scope tree found") + } + + var sinkPatterns []*sitter.Query + for _, sink := range sink { + sinkPattern, err := sitter.NewQuery([]byte(sink), analysis.LangJs.Grammar()) + if err != nil { + return nil, fmt.Errorf("failed to create sink pattern: %w", err) + } + sinkPatterns = append(sinkPatterns, sinkPattern) + } + + var sourcePatterns []*sitter.Query + for _, source := range source { + sourcePattern, err := sitter.NewQuery([]byte(source), analysis.LangJs.Grammar()) + if err != nil { + return nil, fmt.Errorf("failed to create source pattern: %w", err) + } + sourcePatterns = append(sourcePatterns, sourcePattern) + } + + if len(sinkPatterns) == 0 || len(sourcePatterns) == 0 { + return nil, fmt.Errorf("no patterns found") + } + + var sourceNodes []*sitter.Node + var sinkNodes []*sitter.Node + for _, query := range sourcePatterns { + qc := sitter.NewQueryCursor() + defer qc.Close() + qc.Exec(query, pass.FileContext.Ast) + for { + m, ok := qc.NextMatch() + if !ok { + break + } + m = qc.FilterPredicates(m, pass.FileContext.Source) + for _, capture := range m.Captures { + captureNode := capture.Node + sourceNodes = append(sourceNodes, captureNode) + } + + } + } + + for _, query := range sinkPatterns { + qc := sitter.NewQueryCursor() + defer qc.Close() + qc.Exec(query, pass.FileContext.Ast) + for { + m, ok := qc.NextMatch() + if !ok { + break + } + m = qc.FilterPredicates(m, pass.FileContext.Source) + for _, capture := range m.Captures { + captureNode := capture.Node + sinkNodes = append(sinkNodes, captureNode) + } + } + } + + if len(sinkNodes) == 0 || len(sourceNodes) == 0 { + return nil, fmt.Errorf("no sink or source pattern matched") + } + + pass.Report(pass, sinkNodes[0], "sink node found") + // // Get the data flow graph to track variable relationships + + // // Track source variables that flow into sinks + // var taintedFlows []struct { + // source *sitter.Node + // sink *sitter.Node + // } + + // // For each source node, get its variable + // for _, sourceNode := range sourceNodes { + // // Get the assignment node (parent.parent.parent of source capture) + // assignNode := sourceNode.Parent().Parent().Parent() + // if assignNode == nil { + // continue + // } + + // // Get the identifier node and its scope + // idNode := assignNode.ChildByFieldName("left") + // if idNode == nil { + // continue + // } + + // idScope := scopeTree.GetScope(idNode) + // if idScope == nil { + // continue + // } + + // // Look up the variable for the identifier + // sourceVar := idScope.Lookup(idNode.Content(pass.FileContext.Source)) + // if sourceVar == nil { + // continue + // } + + // // For each sink, check if it uses the source variable + // for _, sinkNode := range sinkNodes { + // // Get the call expression node + // callNode, err := analysis.GetRootNode(sinkNode) + // if err != nil { + // continue + // } + + // // Get the argument node and its variable + // argsNode := callNode.ChildByFieldName("arguments") + // if argsNode == nil || argsNode.NamedChildCount() == 0 { + // continue + // } + // argNode := argsNode.NamedChild(0) + + // argScope := scopeTree.GetScope(callNode) + // if argScope == nil { + // continue + // } + + // argVar := argScope.Lookup(argNode.Content(pass.FileContext.Source)) + // if argVar == nil { + // continue + // } + + // // If the argument variable matches the source variable, we found a tainted flow + // if argVar == sourceVar { + // taintedFlows = append(taintedFlows, struct { + // source *sitter.Node + // sink *sitter.Node + // }{sourceNode, sinkNode}) + // } + // } + // } + + // for _, tainted := range taintedFlows { + // pass.Report(pass, tainted.sink, "") + // } + + return map[string]interface{}{ + "sinkNodes": sinkNodes, + "sourceNodes": sourceNodes, + "sinkPatterns": sinkPatterns, + "sourcePatterns": sourcePatterns, + }, nil + } +} diff --git a/checkers/javascript/taint_detector_test.go b/checkers/javascript/taint_detector_test.go new file mode 100644 index 00000000..e74f7388 --- /dev/null +++ b/checkers/javascript/taint_detector_test.go @@ -0,0 +1,88 @@ +package javascript + +import ( + "testing" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/stretchr/testify/assert" + "globstar.dev/analysis" +) + +func TestPatternDetection(t *testing.T) { + source := ` +function getUserInput(key) { + + return document.getElementById(key).value; + +} + +userInput = getUserInput('username') + +// A sink method, which performs some raw databse operation on the userInput +perform_db_operation(userInput) + +} +` + + var TaintDetector = &analysis.Analyzer{ + Name: "taint_detector", + Language: analysis.LangJs, + Description: "Taint detector", + Category: analysis.CategorySecurity, + Severity: analysis.SeverityCritical, + Requires: []*analysis.Analyzer{DataFlowAnalyzer}, + Run: detectTaint([]string{` + (expression_statement + (assignment_expression + right: (call_expression + function: (identifier) @sourceName + ))(#eq? @sourceName "getUserInput"))`}, []string{` + (call_expression + function: (identifier) @sinkName + (#eq? @sinkName "perform_db_operation"))`}), + } + parseRes := parseJsCode(t, []byte(source)) + pass := &analysis.Pass{ + FileContext: parseRes, + Analyzer: TaintDetector, + } + patterns, err := TaintDetector.Run(pass) + assert.NoError(t, err) + + dfg, err := createDataFlowGraph(pass) + assert.NoError(t, err) + + scopeTree := dfg.(*DataFlowGraph).ScopeTree + assert.NotNil(t, scopeTree) + + assert.Len(t, patterns.(map[string]interface{})["sinkNodes"], 1) + assert.Len(t, patterns.(map[string]interface{})["sourceNodes"], 1) + assert.Len(t, patterns.(map[string]interface{})["sinkPatterns"], 1) + assert.Len(t, patterns.(map[string]interface{})["sourcePatterns"], 1) + + var sourceVar *analysis.Variable + + for _, node := range patterns.(map[string]interface{})["sourceNodes"].([]*sitter.Node) { + parentNode := node.Parent().Parent().Parent() + + idScope := scopeTree.GetScope(parentNode.ChildByFieldName("left")) + assert.NotNil(t, idScope) + + idVar := idScope.Lookup(parentNode.ChildByFieldName("left").Content(parseRes.Source)) + assert.NotNil(t, idVar) + + sourceVar = idVar + } + + for _, node := range patterns.(map[string]interface{})["sinkNodes"].([]*sitter.Node) { + parentNode, err := analysis.GetRootNode(node) + assert.NoError(t, err) + arg := parentNode.ChildByFieldName("arguments").NamedChild(0) + + scope := scopeTree.GetScope(parentNode) + scopeVar := scope.Lookup(arg.Content(parseRes.Source)) + + assert.Equal(t, scopeVar, sourceVar) + } + +} diff --git a/checkers/javascript/testdata/taint_detector.test.js b/checkers/javascript/testdata/taint_detector.test.js new file mode 100644 index 00000000..50b4922f --- /dev/null +++ b/checkers/javascript/testdata/taint_detector.test.js @@ -0,0 +1,10 @@ +function getUserInput(key) { + + return document.getElementById(key).value; + +} + +userInput = getUserInput('username') + +// +perform_db_operation(userInput) \ No newline at end of file diff --git a/cmd/globstar/main.go b/cmd/globstar/main.go index 073f0902..9e1d20d5 100644 --- a/cmd/globstar/main.go +++ b/cmd/globstar/main.go @@ -16,7 +16,7 @@ func main() { cli := cli.Cli{ RootDirectory: cwd, - Checkers: nil, // no custom checker set + // Checkers: nil, // no custom checker set } err = cli.Run() diff --git a/go.mod b/go.mod index ac5c5cdd..1fed016b 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 github.com/stretchr/testify v1.10.0 github.com/urfave/cli/v3 v3.0.0-beta1 + golang.org/x/tools v0.11.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 5795f49a..2bd60a58 100644 --- a/go.sum +++ b/go.sum @@ -110,6 +110,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.11.0 h1:EMCa6U9S2LtZXLAMoWiR/R8dAQFRqbAitmbJ2UKhoi8= +golang.org/x/tools v0.11.0/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/pkg/analysis/analyze.go b/pkg/analysis/analyze.go deleted file mode 100644 index 53891bcf..00000000 --- a/pkg/analysis/analyze.go +++ /dev/null @@ -1,439 +0,0 @@ -package analysis - -import ( - "encoding/json" - "fmt" - "path/filepath" - "regexp" - "strings" - - sitter "github.com/smacker/go-tree-sitter" - "globstar.dev/pkg/config" -) - -type Issue struct { - // The category of the issue - Category config.Category - // The severity of the issue - Severity config.Severity - // The message to display to the user - Message string - // The file path of the file that the issue was found in - Filepath string - // The range of the issue in the source code - Range sitter.Range - // (optional) The AST node that caused the issue - Node *sitter.Node - // Id is a unique ID for the issue. - // Issue that have 'Id's can be explained using the `globstar desc` command. - Id *string -} - -func (i *Issue) AsJson() ([]byte, error) { - type location struct { - Row int `json:"row"` - Column int `json:"column"` - } - - type position struct { - Filename string `json:"filename"` - Start location `json:"start"` - End location `json:"end"` - } - - type issueJson struct { - Category config.Category `json:"category"` - Severity config.Severity `json:"severity"` - Message string `json:"message"` - Range position `json:"range"` - Id string `json:"id"` - } - issue := issueJson{ - Category: i.Category, - Severity: i.Severity, - Message: i.Message, - Range: position{ - Filename: i.Filepath, - Start: location{ - Row: int(i.Range.StartPoint.Row), - Column: int(i.Range.StartPoint.Column), - }, - End: location{ - Row: int(i.Range.EndPoint.Row), - Column: int(i.Range.EndPoint.Column), - }, - }, - Id: *i.Id, - } - - return json.Marshal(issue) -} - -func (i *Issue) AsText() ([]byte, error) { - return []byte(fmt.Sprintf("%s:%d:%d:%s", i.Filepath, i.Range.StartPoint.Row, i.Range.StartPoint.Column, i.Message)), nil -} - -type Analyzer struct { - Language Language - // WorkDir is the directory in which the analysis is being run. - WorkDir string - // ParseResult is the result of parsing a file with a tree-sitter parser, - // along with some extra appendages (e.g: scope information). - ParseResult *ParseResult - // checkers is a list of all checkers that should be applied to the AST - // for this language. - checkers []Checker - // patternCheckers is a list of all checkers that run after a query is run on the AST. - // Usually, these are written in a DSL (which, for now, is the tree-sitter S-Expression query language) - YamlCheckers []YamlChecker - // entryCheckers maps node types to the checkers that should be applied - // when entering that node. - entryCheckersForNode map[string][]Checker - // exitCheckers maps node types to the checkers that should be applied - // when leaving that node. - exitCheckersForNode map[string][]Checker - issuesRaised []*Issue -} - -type SkipComment struct { - // the line number for the skipcq comment - CommentLine int - // the entire text of the skipcq comment - CommentText string - // (optional) name of the checker for targetted skip - CheckerIds []string -} - -// package level cache to store comments for each file -var fileSkipComment = make(map[string][]*SkipComment) - -func InitializeSkipComments(analyzers []*Analyzer) { - fileSkipComments := make(map[string][]*SkipComment) - - processedPaths := make(map[string]bool) - - for _, analyzer := range analyzers { - filepath := analyzer.ParseResult.FilePath - if processedPaths[filepath] { - continue - } - - processedPaths[filepath] = true - fileSkipComments[filepath] = GatherSkipInfo(analyzer.ParseResult) - } -} - -func FromFile(filePath string, baseCheckers []Checker) (*Analyzer, error) { - res, err := ParseFile(filePath) - if err != nil { - return nil, err - } - - return NewAnalyzer(res, baseCheckers), nil -} - -func NewAnalyzer(file *ParseResult, checkers []Checker) *Analyzer { - ana := &Analyzer{ - ParseResult: file, - Language: file.Language, - entryCheckersForNode: map[string][]Checker{}, - exitCheckersForNode: map[string][]Checker{}, - } - - for _, checker := range checkers { - ana.AddChecker(checker) - } - - return ana -} - -func (ana *Analyzer) Analyze() []*Issue { - WalkTree(ana.ParseResult.Ast, ana) - ana.runPatternCheckers() - return ana.issuesRaised -} - -func (ana *Analyzer) AddChecker(checker Checker) { - ana.checkers = append(ana.checkers, checker) - typ := checker.NodeType() - - if checker.OnEnter() != nil { - ana.entryCheckersForNode[typ] = append(ana.entryCheckersForNode[typ], checker) - } - - if checker.OnLeave() != nil { - ana.exitCheckersForNode[typ] = append(ana.exitCheckersForNode[typ], checker) - } -} - -func (ana *Analyzer) OnEnterNode(node *sitter.Node) bool { - nodeType := node.Type() - checkers := ana.entryCheckersForNode[nodeType] - for _, checker := range checkers { - visitFn := checker.OnEnter() - if visitFn != nil { - (*visitFn)(checker, ana, node) - } - } - return true -} - -func (ana *Analyzer) OnLeaveNode(node *sitter.Node) { - nodeType := node.Type() - checkers := ana.exitCheckersForNode[nodeType] - for _, checker := range checkers { - visitFn := checker.OnLeave() - if visitFn != nil { - (*visitFn)(checker, ana, node) - } - } -} - -func (ana *Analyzer) shouldSkipChecker(checker YamlChecker) bool { - pathFilter := checker.PathFilter() - if pathFilter == nil { - // no filter is set, so we should not skip this checker - return false - } - - relPath := ana.ParseResult.FilePath - if ana.WorkDir != "" { - rel, err := filepath.Rel(ana.WorkDir, ana.ParseResult.FilePath) - if err == nil { - relPath = rel - } - } - - if len(pathFilter.ExcludeGlobs) > 0 { - for _, excludeGlob := range pathFilter.ExcludeGlobs { - if excludeGlob.Match(relPath) { - return true - } - } - - // no exclude globs matched, so we should not skip this checker - return false - } - - if len(pathFilter.IncludeGlobs) > 0 { - for _, includeGlob := range pathFilter.IncludeGlobs { - if includeGlob.Match(relPath) { - return false - } - } - - // no include globs matched, so we should skip this checker - return true - } - - return false -} - -func (ana *Analyzer) filterMatchesParent(filter *NodeFilter, parent *sitter.Node) bool { - qc := sitter.NewQueryCursor() - defer qc.Close() - - qc.Exec(filter.query, parent) - - // check if the filter matches the `parent` node - for { - m, ok := qc.NextMatch() - if !ok { - break - } - - m = qc.FilterPredicates(m, ana.ParseResult.Source) - for _, capture := range m.Captures { - captureName := filter.query.CaptureNameForId(capture.Index) - if captureName == filterPatternKey && capture.Node == parent { - return true - } - } - } - - return false -} - -// runParentFilters checks if the parent filters for a checker match the given node. -func (ana *Analyzer) runParentFilters(checker YamlChecker, node *sitter.Node) bool { - filters := checker.NodeFilters() - if len(filters) == 0 { - return true - } - - for _, filter := range filters { - shouldMatch := filter.shouldMatch - nodeMatched := false - - // The matched node is expected to be a child of some other - // node, but it has no parents (is a top-level node) - if node.Parent() == nil && filter.shouldMatch { - return false - } - - for parent := node.Parent(); parent != nil; parent = parent.Parent() { - if ana.filterMatchesParent(&filter, parent) { - nodeMatched = true - if !shouldMatch { - // pattern-not-inside matched, so this checker should be skipped - return false - } else { - // pattern-inside matched, so we can break out of the loop - break - } - } - } - - if !nodeMatched && shouldMatch { - return false - } - } - - return true -} - -func (ana *Analyzer) executeCheckerQuery(checker YamlChecker, query *sitter.Query) { - qc := sitter.NewQueryCursor() - defer qc.Close() - - qc.Exec(query, ana.ParseResult.Ast) - for { - m, ok := qc.NextMatch() - - if !ok { - break - } - - m = qc.FilterPredicates(m, ana.ParseResult.Source) - for _, capture := range m.Captures { - captureName := query.CaptureNameForId(capture.Index) - // TODO: explain why captureName == checker.Name() - if captureName == checker.Name() && ana.runParentFilters(checker, capture.Node) { - checker.OnMatch(ana, query, capture.Node, m.Captures) - } - } - } -} - -// runPatternCheckers executes all checkers that are written as AST queries. -func (ana *Analyzer) runPatternCheckers() { - for _, checker := range ana.YamlCheckers { - if ana.shouldSkipChecker(checker) { - continue - } - - queries := checker.Patterns() - for _, q := range queries { - ana.executeCheckerQuery(checker, q) - } - } -} - -func (ana *Analyzer) Report(issue *Issue) { - ana.issuesRaised = append(ana.issuesRaised, issue) -} - -func RunYamlCheckers(path string, analyzers []*Analyzer) ([]*Issue, error) { - InitializeSkipComments(analyzers) - - issues := []*Issue{} - for _, analyzer := range analyzers { - issues = append(issues, analyzer.Analyze()...) - } - return issues, nil -} - -func GatherSkipInfo(fileContext *ParseResult) []*SkipComment { - var skipLines []*SkipComment - - commentIdentifier := GetEscapedCommentIdentifierFromPath(fileContext.FilePath) - pattern := fmt.Sprintf(`%s(?i).*?\bskipcq\b(?::(?:\s*(?P([A-Za-z\-_0-9]*(?:,\s*)?)+))?)?`, commentIdentifier) - skipRegexp := regexp.MustCompile(pattern) - - query, err := sitter.NewQuery([]byte("(comment) @skipcq"), fileContext.Language.Grammar()) - - if err != nil { - return skipLines - } - - cursor := sitter.NewQueryCursor() - cursor.Exec(query, fileContext.Ast) - - // gather all skipcq comment lines in a single pass - for { - m, ok := cursor.NextMatch() - if !ok { - break - } - - for _, capture := range m.Captures { - captureName := query.CaptureNameForId(capture.Index) - if captureName != "skipcq" { - continue - } - - commentNode := capture.Node - commentLine := int(commentNode.StartPoint().Row) - commentText := commentNode.Content(fileContext.Source) - - matches := skipRegexp.FindStringSubmatch(commentText) - if matches != nil { - issueIdsIdx := skipRegexp.SubexpIndex("issue_ids") - var checkerIds []string - - if issueIdsIdx != -1 && issueIdsIdx < len(matches) && matches[issueIdsIdx] != "" { - issueIdsIdx := matches[issueIdsIdx] - idSlice := strings.Split(issueIdsIdx, ",") - for _, id := range idSlice { - trimmedId := strings.TrimSpace(id) - if trimmedId != "" { - checkerIds = append(checkerIds, trimmedId) - } - } - } - - skipLines = append(skipLines, &SkipComment{ - CommentLine: commentLine, - CommentText: commentText, - CheckerIds: checkerIds, // will be empty for generic skipcq - }) - } - - } - } - - return skipLines -} - -func (ana *Analyzer) ContainsSkipcq(skipLines []*SkipComment, issue *Issue) bool { - if len(skipLines) == 0 { - return false - } - - issueNode := issue.Node - nodeLine := int(issueNode.StartPoint().Row) - prevLine := nodeLine - 1 - - var checkerId string - if issue.Id != nil { - checkerId = *issue.Id - } - - for _, comment := range skipLines { - if comment.CommentLine != nodeLine && comment.CommentLine != prevLine { - continue - } - - if len(comment.CheckerIds) > 0 { - for _, id := range comment.CheckerIds { - if checkerId == id { - return true - } - } - } else { - return true - } - } - - return false -} diff --git a/pkg/analysis/analyze_test.go b/pkg/analysis/analyze_test.go deleted file mode 100644 index 696a8fa5..00000000 --- a/pkg/analysis/analyze_test.go +++ /dev/null @@ -1,165 +0,0 @@ -package analysis - -import ( - "testing" - - sitter "github.com/smacker/go-tree-sitter" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func parseTestFile(t *testing.T, filename string, source string, language Language) *ParseResult { - parsed, err := Parse(filename, []byte(source), language, language.Grammar()) - require.NoError(t, err) - require.NotNil(t, parsed) - return parsed -} - -func TestSkipCq(t *testing.T) { - tests := []struct { - name string - checkerId string - source string - language Language - want bool - }{ - { - name: "skipcq comment on same line", - checkerId: "no-assert", - language: LangPy, - source: ` - def someFunc(a, b): - assert a == b # skipcq - `, - want: true, - }, - { - name: "skipcq comment on previous line", - checkerId: "no-assert", - language: LangPy, - source: ` - if True: - # skipcq - assert 1 == 2 - `, - want: true, - }, - { - name: "skipcq comment with target checker", - checkerId: "no-assert", - language: LangPy, - source: ` - if a > 20: - # skipcq: no-assert - assert 5 == 0 - `, - want: true, - }, - { - name: "skipcq comment with mismatches target checker", - checkerId: "no-assert", - language: LangPy, - source: ` - assert a >= float('inf') # skipcq: csv-writer - `, - want: false, - }, - { - name: "skipcq comment not present", - checkerId: "no-assert", - language: LangPy, - source: ` - assert a == b - `, - want: false, - }, - { - name: "skipcq with multiple targets matching", - checkerId: "no-assert", - language: LangPy, - source: ` - # skipcq: csv-writer, no-assert - assert 1 == 10 - `, - want: true, - }, - { - name: "skipcq with multiple targets mismatching", - checkerId: "no-assert", - language: LangPy, - source: ` - assert 2==1 # skipcq: csv-writer, flask-error - `, - want: false, - }, - { - name: "skipcq with extra comments target match", - checkerId: "no-assert", - language: LangPy, - source: ` - def aFunc(): - assert a == b # some comment skipcq: no-assert, sql-inject # nosec, - `, - want: true, - }, - { - name: "skipcq with extra comments target unmatched", - checkerId: "no-assert", - language: LangPy, - source: ` - assert a is b # should be true skipcq: sql-inject, django-taint # more - `, - want: false, - }, - { - name: "skipcq with extra comments no target", - checkerId: "no-assert", - language: LangPy, - source: ` - if True: - assert 1 == 2 # must be false skipcq # nosec, - `, - want: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - parsed := parseTestFile(t, "no-assert.test.py", tt.source, tt.language) - analyzer := &Analyzer{ - Language: tt.language, - ParseResult: parsed, - } - - query, err := sitter.NewQuery([]byte("(assert_statement) @assert"), tt.language.Grammar()) - require.NoError(t, err) - - cursor := sitter.NewQueryCursor() - cursor.Exec(query, parsed.Ast) - - match, ok := cursor.NextMatch() - require.True(t, ok, "failed to find assert statements") - - var assertNode *sitter.Node - for _, captureNode := range match.Captures { - if query.CaptureNameForId(captureNode.Index) == "assert" { - assertNode = captureNode.Node - break - } - } - - require.NotNil(t, assertNode, "failed to capture assert node") - - issue := &Issue{ - Filepath: "no-assert.test.py", - Node: assertNode, - Id: &tt.checkerId, - } - - skipComments := GatherSkipInfo(parsed) - - res := analyzer.ContainsSkipcq(skipComments, issue) - assert.Equal(t, tt.want, res) - }) - } -} diff --git a/pkg/analysis/language.go b/pkg/analysis/language.go deleted file mode 100644 index 41d7974a..00000000 --- a/pkg/analysis/language.go +++ /dev/null @@ -1,316 +0,0 @@ -package analysis - -import ( - "context" - "fmt" - "os" - "path/filepath" - - sitter "github.com/smacker/go-tree-sitter" - - treeSitterBash "github.com/smacker/go-tree-sitter/bash" - treeSitterCsharp "github.com/smacker/go-tree-sitter/csharp" - treeSitterCss "github.com/smacker/go-tree-sitter/css" - treeSitterDockerfile "github.com/smacker/go-tree-sitter/dockerfile" - treeSitterElixir "github.com/smacker/go-tree-sitter/elixir" - treeSitterElm "github.com/smacker/go-tree-sitter/elm" - treeSitterGo "github.com/smacker/go-tree-sitter/golang" - treeSitterGroovy "github.com/smacker/go-tree-sitter/groovy" - treeSitterHcl "github.com/smacker/go-tree-sitter/hcl" - treeSitterHtml "github.com/smacker/go-tree-sitter/html" - treeSitterJava "github.com/smacker/go-tree-sitter/java" - treeSitterKotlin "github.com/smacker/go-tree-sitter/kotlin" - treeSitterLua "github.com/smacker/go-tree-sitter/lua" - treeSitterOCaml "github.com/smacker/go-tree-sitter/ocaml" - treeSitterPhp "github.com/smacker/go-tree-sitter/php" - treeSitterPy "github.com/smacker/go-tree-sitter/python" - treeSitterRuby "github.com/smacker/go-tree-sitter/ruby" - treeSitterRust "github.com/smacker/go-tree-sitter/rust" - treeSitterScala "github.com/smacker/go-tree-sitter/scala" - treeSitterSql "github.com/smacker/go-tree-sitter/sql" - treeSitterSwift "github.com/smacker/go-tree-sitter/swift" - treeSitterTsx "github.com/smacker/go-tree-sitter/typescript/tsx" - treeSitterTs "github.com/smacker/go-tree-sitter/typescript/typescript" -) - -// ParseResult is the result of parsing a file. -type ParseResult struct { - // Ast is the root node of the tree-sitter parse-tree - // representing this file - Ast *sitter.Node - // Source is the raw source code of the file - Source []byte - // FilePath is the path to the file that was parsed - FilePath string - // Language is the tree-sitter language used to parse the file - TsLanguage *sitter.Language - // Language is the language of the file - Language Language - // ScopeTree represents the scope hierarchy of the file. - // Can be nil if scope support for this language has not been implemented yet. - ScopeTree *ScopeTree -} - -type Language int - -const ( - LangUnknown Language = iota - LangPy - LangJs // vanilla JS and JSX - LangTs // TypeScript (not TSX) - LangTsx // TypeScript with JSX extension - LangJava - LangRuby - LangRust - LangYaml - LangCss - LangDockerfile - LangMarkdown - LangSql - LangKotlin - LangOCaml - LangLua - LangBash - LangCsharp - LangElixir - LangElm - LangGo - LangGroovy - LangHcl - LangHtml - LangPhp - LangScala - LangSwift -) - -// tsGrammarForLang returns the tree-sitter grammar for the given language. -// May return `nil` when `lang` is `LangUnkown`. -func (lang Language) Grammar() *sitter.Language { - switch lang { - case LangPy: - return treeSitterPy.GetLanguage() - case LangJs: - return treeSitterTsx.GetLanguage() // Use TypeScript's JSX grammar for JS/JSX - case LangTs: - return treeSitterTs.GetLanguage() - case LangTsx: - return treeSitterTsx.GetLanguage() - case LangJava: - return treeSitterJava.GetLanguage() - case LangRuby: - return treeSitterRuby.GetLanguage() - case LangRust: - return treeSitterRust.GetLanguage() - case LangSql: - return treeSitterSql.GetLanguage() - case LangKotlin: - return treeSitterKotlin.GetLanguage() - case LangCss: - return treeSitterCss.GetLanguage() - case LangOCaml: - return treeSitterOCaml.GetLanguage() - case LangLua: - return treeSitterLua.GetLanguage() - case LangDockerfile: - return treeSitterDockerfile.GetLanguage() - case LangBash: - return treeSitterBash.GetLanguage() - case LangCsharp: - return treeSitterCsharp.GetLanguage() - case LangElixir: - return treeSitterElixir.GetLanguage() - case LangElm: - return treeSitterElm.GetLanguage() - case LangGo: - return treeSitterGo.GetLanguage() - case LangGroovy: - return treeSitterGroovy.GetLanguage() - case LangHcl: - return treeSitterHcl.GetLanguage() - case LangHtml: - return treeSitterHtml.GetLanguage() - case LangPhp: - return treeSitterPhp.GetLanguage() - case LangScala: - return treeSitterScala.GetLanguage() - case LangSwift: - return treeSitterSwift.GetLanguage() - default: - return nil - } -} - -// NOTE(@injuly): TypeScript and TSX have to parsed with DIFFERENT -// grammars. Otherwise, because an expression like `bar` is -// parsed as a (legacy) type-cast in TS, but a JSXElement in TSX. -// See: https://facebook.github.io/jsx/#prod-JSXElement - -// LanguageFromFilePath returns the Language of the file at the given path -// returns `LangUnkown` if the language is not recognized (e.g: `.txt` files). -func LanguageFromFilePath(path string) Language { - ext := filepath.Ext(path) - switch ext { - case ".py": - return LangPy - // TODO: .jsx and .js can both have JSX syntax -_- - case ".js", ".jsx": - return LangJs - case ".ts": - return LangTs - case ".tsx": - return LangTs - case ".java": - return LangJava - case ".rb": - return LangRuby - case ".rs": - return LangRust - case ".css": - return LangCss - case ".Dockerfile": - return LangDockerfile - case ".sql": - return LangSql - case ".kt": - return LangKotlin - case ".ml": - return LangOCaml - case ".lua": - return LangLua - case ".sh": - return LangBash - case ".cs": - return LangCsharp - case ".ex": - return LangElixir - case ".elm": - return LangElm - case ".go": - return LangGo - case ".groovy": - return LangGroovy - case ".tf": - return LangHcl - case ".html": - return LangHtml - case ".php": - return LangPhp - case ".scala": - return LangScala - case ".swift": - return LangSwift - default: - return LangUnknown - } -} - -func GetExtFromLanguage(lang Language) string { - switch lang { - case LangPy: - return ".py" - case LangJs: - return ".js" - case LangTs: - return ".ts" - case LangTsx: - return ".tsx" - case LangJava: - return ".java" - case LangRuby: - return ".rb" - case LangRust: - return ".rs" - case LangYaml: - return ".yaml" - case LangCss: - return ".css" - case LangDockerfile: - return ".Dockerfile" - case LangSql: - return ".sql" - case LangKotlin: - return ".kt" - case LangOCaml: - return ".ml" - case LangLua: - return ".lua" - case LangBash: - return ".sh" - case LangCsharp: - return ".cs" - case LangElixir: - return ".ex" - case LangElm: - return ".elm" - case LangGo: - return ".go" - case LangGroovy: - return ".groovy" - case LangHcl: - return ".tf" - case LangHtml: - return ".html" - case LangPhp: - return ".php" - case LangScala: - return ".scala" - case LangSwift: - return ".swift" - default: - return "" - } -} - -func Parse(filePath string, source []byte, language Language, grammar *sitter.Language) (*ParseResult, error) { - ast, err := sitter.ParseCtx(context.Background(), source, grammar) - if err != nil { - return nil, fmt.Errorf("failed to parse %s", filePath) - } - - scopeTree := MakeScopeTree(language, ast, source) - parseResult := &ParseResult{ - Ast: ast, - Source: source, - FilePath: filePath, - TsLanguage: grammar, - Language: language, - ScopeTree: scopeTree, - } - - return parseResult, nil -} - -// ParseFile parses the file at the given path using the appropriate -// tree-sitter grammar. -func ParseFile(filePath string) (*ParseResult, error) { - lang := LanguageFromFilePath(filePath) - grammar := lang.Grammar() - if grammar == nil { - return nil, fmt.Errorf("unsupported file type: %s", filePath) - } - - source, err := os.ReadFile(filePath) - if err != nil { - return nil, err - } - - return Parse(filePath, source, lang, grammar) -} - -func GetEscapedCommentIdentifierFromPath(path string) string { - lang := LanguageFromFilePath(path) - switch lang { - case LangJs, LangTs, LangTsx, LangJava, LangRust, LangCss, LangMarkdown, LangKotlin, LangCsharp, LangGo, LangGroovy, LangPhp, LangScala, LangSwift: - return "\\/\\/" - case LangPy, LangLua, LangBash, LangRuby, LangYaml, LangDockerfile, LangElixir, LangHcl: - return "#" - case LangSql, LangElm: - return "--" - case LangHtml: - return "<\\!--" - case LangOCaml: - return "\\(\\*" - default: - return "" - } -} diff --git a/pkg/analysis/pattern_rule.go b/pkg/analysis/pattern_rule.go deleted file mode 100644 index 074a1644..00000000 --- a/pkg/analysis/pattern_rule.go +++ /dev/null @@ -1,360 +0,0 @@ -package analysis - -import ( - "fmt" - "os" - "strings" - - "github.com/gobwas/glob" - sitter "github.com/smacker/go-tree-sitter" - "globstar.dev/pkg/config" - "gopkg.in/yaml.v3" -) - -// To get a node back from a tree-sitter query, it *must* have a capture name. -// So: (call_expression) will match nothing, but (call_expression) @some_key -// will match all call expressions. -// For filtering patterns with clauses in the yaml file, like: -// filters: -// - pattern-inside: (call_expression) -// - pattern-not-inside: (catch_block) -// -// We need a to append a key name at the end of the pattern written by the user. -// This is the key that we will use. -const filterPatternKey = "__filter__key__" - -// A YamlChecker is a checker that matches a tree-sitter query pattern -// and reports an issue when the pattern is found. -// Unlike regular issues, PatternCheckers are not associated with a specific node type, rather -// they are invoked for *every* node that matches the pattern. -type YamlChecker interface { - Name() string - Patterns() []*sitter.Query - Language() Language - Category() config.Category - Severity() config.Severity - OnMatch( - ana *Analyzer, // the analyzer instance - matchedQuery *sitter.Query, // the query that found an AST node - matchedNode *sitter.Node, // the AST node that matched the query - captures []sitter.QueryCapture, // list of captures made inside the query - ) - PathFilter() *PathFilter - NodeFilters() []NodeFilter -} - -// NodeFilter is a filter that can be applied to a PatternChecker to restrict -// the the nodes that the checker is applied to. -// The checker is only applied to nodes that have a parent matching (or not matching) the query. -type NodeFilter struct { - query *sitter.Query - shouldMatch bool -} - -// PathFilter is a glob that can be applied to a PatternChecker to restrict -// the files that the checker is applied to. -type PathFilter struct { - ExcludeGlobs []glob.Glob - IncludeGlobs []glob.Glob -} - -type patternCheckerImpl struct { - language Language - patterns []*sitter.Query - issueMessage string - issueId string - category config.Category - severity config.Severity - pathFilter *PathFilter - filters []NodeFilter -} - -func (r *patternCheckerImpl) Language() Language { - return r.language -} - -func (r *patternCheckerImpl) Patterns() []*sitter.Query { - return r.patterns -} - -func (r *patternCheckerImpl) OnMatch( - ana *Analyzer, - matchedQuery *sitter.Query, - matchedNode *sitter.Node, - captures []sitter.QueryCapture, -) { - - // replace all '@' with the corresponding capture value - message := r.issueMessage - // TODO: 1. escape '@' in the message, 2. use a more efficient way to replace - for strings.ContainsRune(message, '@') { - for _, capture := range captures { - captureName := matchedQuery.CaptureNameForId(capture.Index) - message = strings.ReplaceAll( - message, - "@"+captureName, - capture.Node.Content(ana.ParseResult.Source), - ) - } - } - raisedIssue := &Issue{ - Range: matchedNode.Range(), - Node: matchedNode, - Message: message, - Filepath: ana.ParseResult.FilePath, - Category: r.Category(), - Severity: r.Severity(), - Id: &r.issueId, - } - - filepath := ana.ParseResult.FilePath - skipComments := fileSkipComment[filepath] - if !ana.ContainsSkipcq(skipComments, raisedIssue) { - ana.Report(raisedIssue) - } -} - -func (r *patternCheckerImpl) Name() string { - return r.issueId -} - -func (r *patternCheckerImpl) PathFilter() *PathFilter { - return r.pathFilter -} - -func (r *patternCheckerImpl) NodeFilters() []NodeFilter { - return r.filters -} - -func (r *patternCheckerImpl) Category() config.Category { - return r.category -} - -func (r *patternCheckerImpl) Severity() config.Severity { - return r.severity -} - -func CreatePatternChecker( - patterns []*sitter.Query, - language Language, - issueMessage string, - issueId string, - pathFilter *PathFilter, -) YamlChecker { - return &patternCheckerImpl{ - language: language, - patterns: patterns, - issueMessage: issueMessage, - issueId: issueId, - pathFilter: pathFilter, - } -} - -type filterYAML struct { - PatternInside string `yaml:"pattern-inside,omitempty"` - PatternNotInside string `yaml:"pattern-not-inside,omitempty"` -} - -type PatternCheckerFile struct { - Language string `yaml:"language"` - Code string `yaml:"name"` - Message string `yaml:"message"` - Category config.Category `yaml:"category"` - Severity config.Severity `yaml:"severity"` - // Pattern is a single pattern in the form of: - // pattern: (some_pattern) - // in the YAML file - Pattern string `yaml:"pattern,omitempty"` - // Patterns are ultiple patterns in the form of: - // pattern: (something) - // in the YAML file - Patterns []string `yaml:"patterns,omitempty"` - Description string `yaml:"description,omitempty"` - Filters []filterYAML `yaml:"filters,omitempty"` - Exclude []string `yaml:"exclude,omitempty"` - Include []string `yaml:"include,omitempty"` -} - -// DecodeLanguage converts a stringified language name to its corresponding -// Language enum -func DecodeLanguage(language string) Language { - language = strings.ToLower(language) - switch language { - case "javascript", "js": - return LangJs - case "typescript", "ts": - return LangTs - case "jsx", "tsx": - return LangTsx - case "python", "py": - return LangPy - case "ocaml", "ml": - return LangOCaml - case "docker", "dockerfile": - return LangDockerfile - case "java": - return LangJava - case "kotlin", "kt": - return LangKotlin - case "rust", "rs": - return LangRust - case "ruby", "rb": - return LangRuby - case "lua": - return LangLua - case "yaml", "yml": - return LangYaml - case "sql": - return LangSql - case "css", "css3": - return LangCss - case "markdown", "md": - return LangMarkdown - case "sh", "bash": - return LangBash - case "csharp", "cs": - return LangCsharp - case "elixir", "ex": - return LangElixir - case "elm": - return LangElm - case "go": - return LangGo - case "groovy": - return LangGroovy - case "hcl", "tf": - return LangHcl - case "html": - return LangHtml - case "php": - return LangPhp - case "scala": - return LangScala - case "swift": - return LangSwift - default: - return LangUnknown - } -} - -// ReadFromFile reads a pattern checker definition from a YAML config file. -func ReadFromFile(filePath string) (YamlChecker, error) { - fileContent, err := os.ReadFile(filePath) - if err != nil { - return nil, err - } - - return ReadFromBytes(fileContent) -} - -// ReadFromBytes reads a pattern checker definition from bytes array -func ReadFromBytes(fileContent []byte) (YamlChecker, error) { - var checker PatternCheckerFile - if err := yaml.Unmarshal(fileContent, &checker); err != nil { - return nil, err - } - - lang := DecodeLanguage(checker.Language) - if lang == LangUnknown { - return nil, fmt.Errorf("unknown language code: '%s'", checker.Language) - } - - if checker.Code == "" { - return nil, fmt.Errorf("no name provided in checker definition") - } - - if checker.Message == "" { - return nil, fmt.Errorf("no message provided in checker '%s'", checker.Code) - } - - var patterns []*sitter.Query - if checker.Pattern != "" { - pattern, err := sitter.NewQuery([]byte(checker.Pattern), lang.Grammar()) - if err != nil { - return nil, err - } - patterns = append(patterns, pattern) - } else if len(checker.Patterns) > 0 { - for _, patternStr := range checker.Patterns { - pattern, err := sitter.NewQuery([]byte(patternStr), lang.Grammar()) - if err != nil { - return nil, err - } - patterns = append(patterns, pattern) - } - } else { - return nil, fmt.Errorf("no pattern provided in checker '%s'", checker.Code) - } - - if checker.Pattern != "" && len(checker.Patterns) > 0 { - return nil, fmt.Errorf("only one of 'pattern' or 'patterns' can be provided in a checker definition") - } - - // include and exclude patterns - var pathFilter *PathFilter - if checker.Exclude != nil || checker.Include != nil { - pathFilter = &PathFilter{ - ExcludeGlobs: make([]glob.Glob, 0, len(checker.Exclude)), - IncludeGlobs: make([]glob.Glob, 0, len(checker.Include)), - } - - for _, exclude := range checker.Exclude { - g, err := glob.Compile(exclude) - if err != nil { - return nil, err - } - pathFilter.ExcludeGlobs = append(pathFilter.ExcludeGlobs, g) - } - - for _, include := range checker.Include { - g, err := glob.Compile(include) - if err != nil { - return nil, err - } - pathFilter.IncludeGlobs = append(pathFilter.IncludeGlobs, g) - } - } - - // node filters - var filters []NodeFilter - if checker.Filters != nil { - for _, filter := range checker.Filters { - if filter.PatternInside != "" { - queryStr := filter.PatternInside + " @" + filterPatternKey - query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) - if err != nil { - return nil, err - } - - filters = append(filters, NodeFilter{ - query: query, - shouldMatch: true, - }) - } - - if filter.PatternNotInside != "" { - queryStr := filter.PatternNotInside + " @" + filterPatternKey - query, err := sitter.NewQuery([]byte(queryStr), lang.Grammar()) - if err != nil { - return nil, err - } - - filters = append(filters, NodeFilter{ - query: query, - shouldMatch: false, - }) - } - } - } - - patternChecker := &patternCheckerImpl{ - language: lang, - patterns: patterns, - issueMessage: checker.Message, - issueId: checker.Code, - pathFilter: pathFilter, - filters: filters, - } - - return patternChecker, nil -} diff --git a/pkg/analysis/rule.go b/pkg/analysis/rule.go deleted file mode 100644 index bbb76443..00000000 --- a/pkg/analysis/rule.go +++ /dev/null @@ -1,33 +0,0 @@ -package analysis - -import sitter "github.com/smacker/go-tree-sitter" - -type VisitFn func(checker Checker, analyzer *Analyzer, node *sitter.Node) - -type Checker interface { - NodeType() string - GetLanguage() Language - OnEnter() *VisitFn - OnLeave() *VisitFn -} - -type checkerImpl struct { - nodeType string - language Language - onEnter *VisitFn - onLeave *VisitFn -} - -func (r *checkerImpl) NodeType() string { return r.nodeType } -func (r *checkerImpl) GetLanguage() Language { return r.language } -func (r *checkerImpl) OnEnter() *VisitFn { return r.onEnter } -func (r *checkerImpl) OnLeave() *VisitFn { return r.onLeave } - -func CreateChecker(nodeType string, language Language, onEnter, onLeave *VisitFn) Checker { - return &checkerImpl{ - nodeType: nodeType, - language: language, - onEnter: onEnter, - onLeave: onLeave, - } -} diff --git a/pkg/analysis/scope.go b/pkg/analysis/scope.go deleted file mode 100644 index 2aefe4ad..00000000 --- a/pkg/analysis/scope.go +++ /dev/null @@ -1,190 +0,0 @@ -// A language agnostic interface for scope handling which -// also handles forward declarations and references (e.g: hoisting). -// BUT, references aren't tracked across files in a language like Golang or C++ (macros/extern/using namespace) - -package analysis - -import sitter "github.com/smacker/go-tree-sitter" - -// Reference represents a variable reference inside a source file -// Cross-file references like those in Golang and C++ (macros/extern) are NOT supported, -// so this shouldn't be used for checkers like "unused-variable", but is safe to use for checkers like -// "unused-import" -type Reference struct { - // IsWriteRef determines if this reference is a write reference. - // For write refs, only the expression being assigned is stored. - // i.e: for `a = 3`, this list will store the `3` node, not the assignment node - IsWriteRef bool - // Variable stores the variable being referenced - Variable *Variable - // Node stores the node that references the variable - Node *sitter.Node -} - -type VarKind int32 - -const ( - VarKindError VarKind = iota - VarKindImport - VarKindFunction - VarKindVariable - VarKindParameter -) - -type Variable struct { - Kind VarKind - // Stores the name of the variable - Name string - // DeclNode is the AST node that declares this variable - DeclNode *sitter.Node - // Refs is a list of references to this variable throughout the file - Refs []*Reference -} - -// ScopeBuilder is an interface that has to be implemented -// once for every supported language. -// Languages that don't implement a `ScopeBuilder` can still have checkers, just -// not any that require scope resolution. -type ScopeBuilder interface { - GetLanguage() Language - // NodeCreatesScope returns true if the node introduces a new scope - // into the scope tree - NodeCreatesScope(node *sitter.Node) bool - // DeclaresVariable determines if we can extract new variables out of this AST node - DeclaresVariable(node *sitter.Node) bool - // CollectVariables extracts variables from the node and adds them to the scope - CollectVariables(node *sitter.Node) []*Variable - // OnNodeEnter is called when the scope builder enters a node - // for the first time, and hasn't scanned its children decls just yet - // can be used to handle language specific scoping rules, if any - // If `node` is smth like a block statement, `currentScope` corresponds - // to the scope introduced by the block statement. - OnNodeEnter(node *sitter.Node, currentScope *Scope) - // OnNodeExit is called when the scope builder exits a node - // can be used to handle language specific scoping rules, if any - // If `node` is smth like a block statement, `currentScope` corresponds - // to the scope introduced by the block statement. - OnNodeExit(node *sitter.Node, currentScope *Scope) -} - -type Scope struct { - // AstNode is the AST node that introduces this scope into the scope tree - AstNode *sitter.Node - // Variables is a map of variable name to an object representing it - Variables map[string]*Variable - // Upper is the parent scope of this scope - Upper *Scope - // Children is a list of scopes that are children of this scope - Children []*Scope -} - -func NewScope(upper *Scope) *Scope { - return &Scope{ - Variables: map[string]*Variable{}, - Upper: upper, - } -} - -// Lookup searches for a variable in the current scope and its parents -func (s *Scope) Lookup(name string) *Variable { - if v, exists := s.Variables[name]; exists { - return v - } - - if s.Upper != nil { - return s.Upper.Lookup(name) - } - - return nil -} - -type ScopeTree struct { - Language Language - // ScopeOfNode maps every scope-having node to its corresponding scope. - // E.g: a block statement is mapped to the scope it introduces. - ScopeOfNode map[*sitter.Node]*Scope - // Root is the top-level scope in the program, - // usually associated with the `program` or `module` node - Root *Scope -} - -// BuildScopeTree constructs a scope tree from the AST for a program -func BuildScopeTree(builder ScopeBuilder, ast *sitter.Node, source []byte) *ScopeTree { - root := NewScope(nil) - root.AstNode = ast - - scopeOfNode := make(map[*sitter.Node]*Scope) - buildScopeTree(builder, source, ast, root, scopeOfNode) - - return &ScopeTree{ - Language: builder.GetLanguage(), - ScopeOfNode: scopeOfNode, - Root: root, - } -} - -func buildScopeTree( - builder ScopeBuilder, - source []byte, - node *sitter.Node, - scope *Scope, - scopeOfNode map[*sitter.Node]*Scope, -) *Scope { - builder.OnNodeEnter(node, scope) - defer builder.OnNodeExit(node, scope) - - if builder.DeclaresVariable(node) { - decls := builder.CollectVariables(node) - for _, decl := range decls { - scope.Variables[decl.Name] = decl - } - } - - nextScope := scope - if builder.NodeCreatesScope(node) { - nextScope = NewScope(scope) - nextScope.AstNode = node - scopeOfNode[node] = nextScope - - if scope != nil { - scope.Children = append(scope.Children, nextScope) - } else { - scope = nextScope // root - } - } - - for i := 0; i < int(node.NamedChildCount()); i++ { - child := node.NamedChild(i) - buildScopeTree(builder, source, child, nextScope, scopeOfNode) - } - - return scope -} - -// GetScope finds the nearest surrounding scope of an AST node -func (st *ScopeTree) GetScope(node *sitter.Node) *Scope { - if scope, exists := st.ScopeOfNode[node]; exists { - return scope - } - - if parent := node.Parent(); parent != nil { - return st.GetScope(parent) - } - - return nil -} - -func MakeScopeTree(lang Language, ast *sitter.Node, source []byte) *ScopeTree { - switch lang { - case LangPy: - return nil - case LangTs, LangJs, LangTsx: - builder := &TsScopeBuilder{ - ast: ast, - source: source, - } - return BuildScopeTree(builder, ast, source) - default: - return nil - } -} diff --git a/pkg/analysis/scope_ts.go b/pkg/analysis/scope_ts.go deleted file mode 100644 index f69b50cd..00000000 --- a/pkg/analysis/scope_ts.go +++ /dev/null @@ -1,295 +0,0 @@ -// scope resolution implementation for JS and TS files -package analysis - -import ( - "slices" - - sitter "github.com/smacker/go-tree-sitter" -) - -type UnresolvedRef struct { - id *sitter.Node - surroundingScope *Scope -} - -type TsScopeBuilder struct { - ast *sitter.Node - source []byte - // unresolvedRefs is the list of references that could not be resolved thus far in the traversal - unresolvedRefs []UnresolvedRef -} - -func (j *TsScopeBuilder) GetLanguage() Language { - return LangJs -} - -var ScopeNodes = []string{ - "statement_block", - "function_declaration", - "function_expression", - "for_statement", - "for_in_statement", - "for_of_statement", - "program", -} - -func (ts *TsScopeBuilder) NodeCreatesScope(node *sitter.Node) bool { - return slices.Contains(ScopeNodes, node.Type()) -} - -func (ts *TsScopeBuilder) DeclaresVariable(node *sitter.Node) bool { - typ := node.Type() - // addition of function_declaration and formal_parameters necessary for functional scope handling. - return typ == "variable_declarator" || typ == "import_clause" || typ == "import_specifier" || typ == "formal_parameters" || typ == "function_declaration" -} - -func (ts *TsScopeBuilder) scanDecl(idOrPattern, declarator *sitter.Node, decls []*Variable) []*Variable { - switch idOrPattern.Type() { - case "identifier": - // = ... - nameStr := idOrPattern.Content(ts.source) - decls = append(decls, &Variable{ - Kind: VarKindVariable, - Name: nameStr, - DeclNode: declarator, - }) - - case "object_pattern": - // { } = ... - props := ChildrenOfType(idOrPattern, "shorthand_property_identifier_pattern") - for _, prop := range props { - decls = append(decls, &Variable{ - Kind: VarKindVariable, - Name: prop.Content(ts.source), - DeclNode: declarator, - }) - } - - pairs := ChildrenOfType(idOrPattern, "pair_pattern") - for _, pair := range pairs { - decls = ts.scanDecl(pair, declarator, decls) - } - - // { realName : } = ... - // alias can be an identifier or nested object pattern. - case "pair_pattern": - binding := idOrPattern.ChildByFieldName("value") - decls = ts.scanDecl(binding, declarator, decls) - - case "array_pattern": - // [ ] = foo - childrenIds := ChildrenOfType(idOrPattern, "identifier") - childrenObjPatterns := ChildrenOfType(idOrPattern, "object_pattern") - childrenArrayPatterns := ChildrenOfType(idOrPattern, "array_pattern") - for _, id := range childrenIds { - decls = append(decls, &Variable{ - Kind: VarKindVariable, - Name: id.Content(ts.source), - DeclNode: declarator, - }) - } - - for _, objPattern := range childrenObjPatterns { - decls = ts.scanDecl(objPattern, declarator, decls) - } - - for _, arrayPattern := range childrenArrayPatterns { - decls = ts.scanDecl(arrayPattern, declarator, decls) - } - - for _, objectPattern := range childrenObjPatterns { - decls = ts.scanDecl(objectPattern, declarator, decls) - } - } - - return decls -} - -func (ts *TsScopeBuilder) variableFromImportSpecifier(specifier *sitter.Node) *Variable { - name := specifier.ChildByFieldName("name") - if name == nil { - // skipcq: TCV-001 - return nil - } - - var Name string - if specifier.Child(2) != nil { - // alias ( as ) - local := specifier.Child(2) - Name = local.Content(ts.source) - } else { - // no alias - Name = name.Content(ts.source) - } - - return &Variable{ - Kind: VarKindImport, - Name: Name, - DeclNode: specifier, - } -} - -func (ts *TsScopeBuilder) CollectVariables(node *sitter.Node) []*Variable { - var declaredVars []*Variable - switch node.Type() { - case "variable_declarator": - lhs := node.ChildByFieldName("name") - return ts.scanDecl(lhs, node, declaredVars) - - case "function_declaration": - name := node.ChildByFieldName("name") - // skipcq: TCV-001 - if name == nil { - break - } - - declaredVars = append(declaredVars, &Variable{ - Kind: VarKindFunction, - Name: name.Content(ts.source), - DeclNode: node, - }) - - case "formal_parameters": - // TODO - - for i := 0; i < int(node.NamedChildCount()); i++ { - param := node.NamedChild(i) - if param == nil { - continue - } - // Handle different parameter types (required, optional, rest, patterns) - // Simple identifier parameter: function foo(x) - // Required parameter often wraps identifier: function foo(x: number) - var identifier *sitter.Node - if param.Type() == "identifier" { - identifier = param - } else if param.Type() == "required_parameter" || param.Type() == "optional_parameter" { - // Look for pattern which might be identifier or destructuring - pattern := param.ChildByFieldName("pattern") - if pattern != nil && pattern.Type() == "identifier" { - identifier = pattern - } - // TODO: Handle destructuring patterns within parameters if needed by calling scanDecl - } else if param.Type() == "assignment_pattern" { - // Parameter with default value: function foo(x = 1) - left := param.ChildByFieldName("left") - if left != nil && left.Type() == "identifier" { - identifier = left - } - // TODO: Handle destructuring patterns within parameters if needed by calling scanDecl - } - // TODO: Handle rest parameter (...)+ - if identifier != nil { - declaredVars = append(declaredVars, &Variable{ - Kind: VarKindParameter, - Name: identifier.Content(ts.source), - DeclNode: param, // Use the parameter node itself (or identifier) as DeclNode - }) - } - // Add handling for destructuring patterns here if necessary using scanDecl - } - - case "import_specifier": - // import { } from ... - variable := ts.variableFromImportSpecifier(node) - declaredVars = append(declaredVars, variable) - - case "import_clause": - // import , { } from ... - defaultImport := FirstChildOfType(node, "identifier") - if defaultImport != nil { - declaredVars = append(declaredVars, &Variable{ - Kind: VarKindImport, - Name: defaultImport.Content(ts.source), - DeclNode: defaultImport, - }) - } - } - - return declaredVars -} - -func (ts *TsScopeBuilder) OnNodeEnter(node *sitter.Node, scope *Scope) { - // collect identifier references if one is found - if node.Type() == "identifier" { - parent := node.Parent() - if parent == nil { - return - } - - parentType := parent.Type() - - if parentType == "variable_declarator" && parent.ChildByFieldName("name") == node { - return - } - - if parentType == "formal_parameters" { - return - } - - // binding identifiers in array patterns are not references. - // e.g. in `const [a, b] = foo;`, `a` and `b` are not references. - if parentType == "array_pattern" { - return - } - - if parentType == "assignment_pattern" && parent.ChildByFieldName("left") == node { - return - } - - if parentType == "required_parameter" && parent.ChildByFieldName("pattern") == node { - return - } - - // destructured property binding names are *not* references. - // e.g. in `const { a: b } = foo;`, `a` is not a reference. - if parentType == "pair_pattern" && parent.ChildByFieldName("key") == node { - return - } - - if parentType == "import_clause" || parentType == "import_specifier" { - return - } - - // try to resolve this reference to a target variable - variable := scope.Lookup(node.Content(ts.source)) - if variable == nil { - unresolved := UnresolvedRef{ - id: node, - surroundingScope: scope, - } - - ts.unresolvedRefs = append(ts.unresolvedRefs, unresolved) - return - } - - // If a variable is found, add a reference to it - ref := &Reference{ - Variable: variable, - Node: node, - } - variable.Refs = append(variable.Refs, ref) - } -} - -func (ts *TsScopeBuilder) OnNodeExit(node *sitter.Node, scope *Scope) { - if node.Type() == "program" { - // At the end, try to resolve all unresolved references - for _, unresolved := range ts.unresolvedRefs { - variable := unresolved.surroundingScope.Lookup( - unresolved.id.Content(ts.source), - ) - - if variable == nil { - continue - } - - ref := &Reference{ - Variable: variable, - Node: unresolved.id, - } - - variable.Refs = append(variable.Refs, ref) - } - } -} diff --git a/pkg/analysis/scope_ts_test.go b/pkg/analysis/scope_ts_test.go deleted file mode 100644 index 389a8de2..00000000 --- a/pkg/analysis/scope_ts_test.go +++ /dev/null @@ -1,133 +0,0 @@ -package analysis - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func parseFile(t *testing.T, source string) *ParseResult { - parsed, err := Parse("file.ts", []byte(source), LangJs, LangJs.Grammar()) - require.NoError(t, err) - require.NotNil(t, parsed) - return parsed -} - -func Test_BuildScopeTree(t *testing.T) { - t.Run("is able to resolve references", func(t *testing.T) { - source := ` - let x = 1 - { - let y = x - }` - parsed := parseFile(t, source) - - scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) - require.NotNil(t, scopeTree) - globalScope := scopeTree.Root.Children[0] - varX, exists := globalScope.Variables["x"] - require.True(t, exists) - require.NotNil(t, varX) - - varY, exists := globalScope.Children[0].Variables["y"] - require.True(t, exists) - require.NotNil(t, varY) - require.Equal(t, VarKindVariable, varY.Kind) - - assert.Equal(t, 1, len(varX.Refs)) - xRef := varX.Refs[0] - assert.Equal(t, "x", xRef.Variable.Name) - require.Equal(t, VarKindVariable, varY.Kind) - }) - - t.Run("supports import statements", func(t *testing.T) { - source := ` - import { extname } from 'path' - { - let { extname = 1 } = null // does NOT count as a reference - } - - let { x = extname } = null // counts as a reference - - { - extname('file.txt') // counts as a reference - let { extname } = null // does NOT count as a reference - } - - import { readFile as r } from 'file' - r('file.txt') - function f(r = x) {} // NOT a reference - ` - parsed := parseFile(t, source) - - scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) - require.NotNil(t, scopeTree) - globalScope := scopeTree.Root.Children[0] - { - varR, exists := globalScope.Variables["r"] - require.True(t, exists) - require.NotNil(t, varR) - - assert.Equal(t, VarKindImport, varR.Kind) - - rRefs := varR.Refs - require.Equal(t, 1, len(rRefs)) - assert.Equal(t, "call_expression", rRefs[0].Node.Parent().Type()) - } - - { - varExtname, exists := globalScope.Variables["extname"] - require.True(t, exists) - require.NotNil(t, varExtname) - - assert.Equal(t, VarKindImport, varExtname.Kind) - - extnameRefs := varExtname.Refs - require.Equal(t, 2, len(extnameRefs)) - assert.Equal(t, "object_assignment_pattern", extnameRefs[0].Node.Parent().Type()) - assert.Equal(t, "call_expression", extnameRefs[1].Node.Parent().Type()) - } - }) - - t.Run("handles function declaration with parameters", func(t *testing.T) { - source := ` - function greet(name, age = 18) { - let greeting = "Hello"; - return greeting + " " + name; - } - greet("Alice") - ` - - parsed := parseFile(t, source) - require.NotNil(t, parsed) - scopeTree := MakeScopeTree(parsed.Language, parsed.Ast, parsed.Source) - globalScope := scopeTree.Root.Children[0] - // Checking function declaration - funcVar := globalScope.Lookup("greet") - require.NotNil(t, funcVar) - funcVariable, exists := globalScope.Variables["greet"] // tagged as an Identifier - require.True(t, exists) - require.NotNil(t, funcVariable) - - funcScope := scopeTree.GetScope(funcVar.DeclNode) - require.NotNil(t, funcScope) - - nameVar, exists := funcScope.Variables["name"] - require.True(t, exists) - require.Equal(t, VarKindParameter, nameVar.Kind) - - ageVar, exists := funcScope.Variables["age"] - require.True(t, exists) - require.Equal(t, VarKindParameter, ageVar.Kind) - - // existence of function body - - bodyScope := funcScope.Children[0] - require.NotNil(t, bodyScope) - - greetingVar, exists := bodyScope.Variables["greeting"] - require.True(t, exists) - require.Equal(t, VarKindVariable, greetingVar.Kind) - }) -} diff --git a/pkg/analysis/walk.go b/pkg/analysis/walk.go deleted file mode 100644 index 06273995..00000000 --- a/pkg/analysis/walk.go +++ /dev/null @@ -1,96 +0,0 @@ -package analysis - -import ( - sitter "github.com/smacker/go-tree-sitter" -) - -// Walker is an interface that dictates what to do when -// entering and leaving each node during the pre-order traversal -// of a tree. -// To traverse post-order, use the `OnLeaveNode` callback. -type Walker interface { - // OnEnterNode is called when the walker enters a node. - // The boolean return value indicates whether the walker should - // continue walking the sub-tree of this node. - OnEnterNode(node *sitter.Node) bool - // OnLeaveNode is called when the walker leaves a node. - // This is called after all the children of the node have been visited and explored. - OnLeaveNode(node *sitter.Node) -} - -func WalkTree(node *sitter.Node, walker Walker) { - goInside := walker.OnEnterNode(node) - if goInside { - for i := 0; i < int(node.NamedChildCount()); i++ { - child := node.NamedChild(i) - WalkTree(child, walker) - } - } - - walker.OnLeaveNode(node) -} - -// ChildrenWithFieldName returns all the children of a node -// with a specific field name. -// Tree-sitter can have multiple children with the same field name. -func ChildrenWithFieldName(node *sitter.Node, fieldName string) []*sitter.Node { - var children []*sitter.Node - for i := 0; i < int(node.ChildCount()); i++ { - if node.FieldNameForChild(i) == fieldName { - child := node.Child(i) - children = append(children, child) - } - } - - return children -} - -// FindMatchingChild iterates over all children of a node—both named and unnamed—and returns the -// first child that matches the predicate function. -func FindMatchingChild(node *sitter.Node, predicate func(*sitter.Node) bool) *sitter.Node { - nChildren := int(node.ChildCount()) - - for i := 0; i < nChildren; i++ { - child := node.Child(i) - if predicate(child) { - return child - } - } - - return nil -} - -func ChildrenOfType(node *sitter.Node, nodeType string) []*sitter.Node { - nChildren := int(node.ChildCount()) - var results []*sitter.Node - for i := 0; i < nChildren; i++ { - child := node.Child(i) - if child.Type() == nodeType { - results = append(results, child) - } - } - return results -} - -func ChildWithFieldName(node *sitter.Node, fieldName string) *sitter.Node { - nChildren := int(node.NamedChildCount()) - for i := 0; i < nChildren; i++ { - if node.FieldNameForChild(i) == fieldName { - return node.Child(i) - } - } - - return nil -} - -func FirstChildOfType(node *sitter.Node, nodeType string) *sitter.Node { - nChildren := int(node.ChildCount()) - for i := 0; i < nChildren; i++ { - child := node.Child(i) - if child.Type() == nodeType { - return child - } - } - - return nil -} diff --git a/pkg/cli/cli.go b/pkg/cli/cli.go index f6183aca..cadecb89 100644 --- a/pkg/cli/cli.go +++ b/pkg/cli/cli.go @@ -17,7 +17,8 @@ import ( goAnalysis "globstar.dev/analysis" "globstar.dev/checkers" "globstar.dev/checkers/discover" - "globstar.dev/pkg/analysis" + + // "globstar.dev/pkg/analysis" "globstar.dev/pkg/config" "globstar.dev/util" ) @@ -26,9 +27,9 @@ type Cli struct { // RootDirectory is the target directory to analyze RootDirectory string // Checkers is a list of checkers that are applied to the files in `RootDirectory` - Checkers []analysis.Checker - Config *config.Config - CmpHash string + // Checkers []analysis.Checker + Config *config.Config + CmpHash string } func (c *Cli) loadConfig() error { @@ -193,7 +194,7 @@ to run only the built-in checkers, and --checkers=all to run both.`, // Track test failures but continue running all tests var testsFailed bool - yamlPassed, err := runTests(analysisDir) + yamlPassed, err := runTestCases(analysisDir) if err != nil { err = fmt.Errorf("error running YAML tests: %w", err) fmt.Fprintln(os.Stderr, err.Error()) @@ -201,6 +202,7 @@ to run only the built-in checkers, and --checkers=all to run both.`, } if !yamlPassed { testsFailed = true + return fmt.Errorf("YAML tests failed ") } goPassed := true @@ -294,46 +296,46 @@ func (c *Cli) buildCustomGoCheckers() error { return nil } -func (c *Cli) CheckFile( - checkersMap map[analysis.Language][]analysis.Checker, - patternCheckers map[analysis.Language][]analysis.YamlChecker, - path string, -) ([]*analysis.Issue, error) { - lang := analysis.LanguageFromFilePath(path) - checkers := checkersMap[lang] - if checkers == nil && patternCheckers == nil { - // no checkers are registered for this language - return nil, nil - } - - analyzer, err := analysis.FromFile(path, checkers) - if err != nil { - return nil, err - } - analyzer.WorkDir = c.RootDirectory - - if patternCheckers != nil { - analyzer.YamlCheckers = patternCheckers[lang] - } - - return analyzer.Analyze(), nil -} +// func (c *Cli) CheckFile( +// checkersMap map[goAnalysis.Language][]goAnalysis.Analyzer, +// patternCheckers map[goAnalysis.Language][]goAnalysis.Analyzer, +// path string, +// ) ([]*goAnalysis.Issue, error) { +// lang := goAnalysis.LanguageFromFilePath(path) +// checkers := checkersMap[lang] +// if checkers == nil && patternCheckers == nil { +// // no checkers are registered for this language +// return nil, nil +// } + +// analyzer, err := analysis.FromFile(path, checkers) +// if err != nil { +// return nil, err +// } +// analyzer.WorkDir = c.RootDirectory + +// if patternCheckers != nil { +// analyzer.YamlCheckers = patternCheckers[lang] +// } + +// return analyzer.Analyze(), nil +// } type checkResult struct { - issues []*analysis.Issue + issues []*goAnalysis.Issue numFilesChecked int } func (lr *checkResult) GetExitStatus(conf *config.Config) int { for _, issue := range lr.issues { for _, failCategory := range conf.FailWhen.CategoryIn { - if issue.Category == failCategory { + if issue.Category == goAnalysis.Category(failCategory) { return conf.FailWhen.ExitCode } } for _, failSeverity := range conf.FailWhen.SeverityIn { - if issue.Severity == failSeverity { + if issue.Severity == goAnalysis.Severity(failSeverity) { return conf.FailWhen.ExitCode } } @@ -360,7 +362,7 @@ var defaultIgnoreDirs = []string{ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) - patternCheckers := make(map[analysis.Language][]analysis.YamlChecker) + patternCheckers := make(map[goAnalysis.Language][]goAnalysis.Analyzer) var goAnalyzers []*goAnalysis.Analyzer if runBuiltinCheckers { @@ -443,8 +445,8 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { } } - language := analysis.LanguageFromFilePath(path) - if language == analysis.LangUnknown { + language := goAnalysis.LanguageFromFilePath(path) + if language == goAnalysis.LangUnknown { return nil } @@ -453,20 +455,35 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { // run checker // the first arg is empty, since the format for inbuilt Go-based checkers has changed // TODO: factor it in later - issues, err := c.CheckFile(map[analysis.Language][]analysis.Checker{}, patternCheckers, path) - if err != nil { - // parse error on a single file should not exit the entire analysis process - // TODO: logging the below error message is not helpful, as it logs unsupported file types as well - // fmt.Fprintf(os.Stderr, "Error parsing file %s: %s\n", path, err) - return nil - } - - for _, issue := range issues { - txt, _ := issue.AsText() - log.Error().Msg(string(txt)) - - result.issues = append(result.issues, issue) - } + // nonYamlAnalyzers := []*goAnalysis.Analyzer{} + // issues, err := goAnalysis.RunAnalyzers(c.RootDirectory, nonYamlAnalyzers, func(filename string) bool { + // if c.CmpHash != "" { + // _, isChanged := changedFileMap[filename] + // return isChanged + // } + // return true + // }) + + // if err != nil { + // // parse error on a single file should not exit the entire analysis process + // // TODO: logging the below error message is not helpful, as it logs unsupported file types as well + // // fmt.Fprintf(os.Stderr, "Error parsing file %s: %s\n", path, err) + // return nil + // } + + // for _, issue := range issues { + // txt, _ := issue.AsText() + // log.Error().Msg(string(txt)) + + // result.issues = append(result.issues, &goAnalysis.Issue{ + // Filepath: issue.Filepath, + // Message: issue.Message, + // Severity: goAnalysis.Severity(issue.Severity), + // Category: goAnalysis.Category(issue.Category), + // Node: issue.Node, + // Id: issue.Id, + // }) + // } return nil }) @@ -494,11 +511,11 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { txt, _ := issue.AsText() log.Error().Msg(string(txt)) - result.issues = append(result.issues, &analysis.Issue{ + result.issues = append(result.issues, &goAnalysis.Issue{ Filepath: issue.Filepath, Message: issue.Message, - Severity: config.Severity(issue.Severity), - Category: config.Category(issue.Category), + Severity: goAnalysis.Severity(issue.Severity), + Category: goAnalysis.Category(issue.Category), Node: issue.Node, Id: issue.Id, }) @@ -516,11 +533,11 @@ func (c *Cli) RunCheckers(runBuiltinCheckers, runCustomCheckers bool) error { } for _, issue := range customGoIssues { - result.issues = append(result.issues, &analysis.Issue{ + result.issues = append(result.issues, &goAnalysis.Issue{ Filepath: issue.Filepath, Message: issue.Message, - Severity: config.Severity(issue.Severity), - Category: config.Category(issue.Category), + Severity: goAnalysis.Severity(issue.Severity), + Category: goAnalysis.Category(issue.Category), Node: issue.Node, Id: issue.Id, }) diff --git a/pkg/cli/test_runner.go b/pkg/cli/test_runner.go index e213ad3e..3694ed1f 100644 --- a/pkg/cli/test_runner.go +++ b/pkg/cli/test_runner.go @@ -9,7 +9,8 @@ import ( "slices" "strings" - "globstar.dev/pkg/analysis" + ana "globstar.dev/analysis" + js "globstar.dev/checkers/javascript" ) func runTests(dir string) (bool, error) { @@ -49,13 +50,13 @@ func findTestCases(dir string) ([]testCase, error) { return nil } - patternChecker, err := analysis.ReadFromFile(path) + patternChecker, _, err := ana.ReadFromFile(path) if err != nil { fmt.Fprintf(os.Stderr, "invalid checker '%s': %s\n", d.Name(), err.Error()) return nil } - testFile := strings.TrimSuffix(path, fileExt) + ".test" + analysis.GetExtFromLanguage(patternChecker.Language()) + testFile := strings.TrimSuffix(path, fileExt) + ".test" + ana.GetExtFromLanguage(patternChecker.Language) if _, err := os.Stat(testFile); os.IsNotExist(err) { testFile = "" @@ -91,28 +92,35 @@ func runTestCases(dir string) (passed bool, err error) { fmt.Fprintf(os.Stderr, "Running test case: %s\n", filepath.Base(tc.yamlCheckerPath)) // Read and parse the checker definition - checker, err := analysis.ReadFromFile(tc.yamlCheckerPath) + checker, yamlAnalyzer, err := ana.ReadFromFile(tc.yamlCheckerPath) if err != nil { return false, err } - // Parse the test file - analyzer, err := analysis.FromFile(tc.testFile, []analysis.Checker{}) + want, err := findExpectedLines(tc.testFile) if err != nil { return false, err } - analyzer.WorkDir = dir - analyzer.YamlCheckers = append(analyzer.YamlCheckers, checker) - issues := analyzer.Analyze() - want, err := findExpectedLines(tc.testFile) + issues, err := ana.RunAnalyzers(tc.testFile, []*ana.Analyzer{&checker}, nil) if err != nil { return false, err } + var analysisFuncAnalyzer *ana.Analyzer + if yamlAnalyzer.AnalysisFunction != nil { + analysisFuncAnalyzer = GetAnalysisFunction(&yamlAnalyzer) + analysisFunctionIssues, err := ana.RunAnalysisFunction(tc.testFile, []*ana.Analyzer{analysisFuncAnalyzer}, nil) + if err != nil { + return false, err + } + + issues = append(issues, analysisFunctionIssues...) + } + var got []int for _, issue := range issues { - got = append(got, int(issue.Range.StartPoint.Row)+1) // 0-indexed to 1-indexed + got = append(got, int(issue.Node.Range().StartPoint.Row)+1) // 0-indexed to 1-indexed } slices.Sort(got) @@ -178,3 +186,15 @@ func findExpectedLines(filePath string) ([]int, error) { return expectedLines, nil } + +func GetAnalysisFunction(yamlAnalyzer *ana.YamlAnalyzer) *ana.Analyzer { + analysisFunction := yamlAnalyzer.AnalysisFunction + + switch analysisFunction.Name { + case "taint": + return js.GetTaintFunction(yamlAnalyzer.AnalysisFunction.Parameters["sources"], yamlAnalyzer.AnalysisFunction.Parameters["sinks"]) + default: + return nil + } + +} diff --git a/pkg/cli/test_runner_test.go b/pkg/cli/test_runner_test.go new file mode 100644 index 00000000..2f1f2726 --- /dev/null +++ b/pkg/cli/test_runner_test.go @@ -0,0 +1,28 @@ +package cli + +import ( + "testing" + + "github.com/stretchr/testify/assert" + ana "globstar.dev/analysis" +) + +func TestGetAnalysisFunction(t *testing.T) { + path := "testdata/mock-analysis-function.yml" + _, yamlAnalyzer, err := ana.ReadFromFile(path) + analysisFunction := yamlAnalyzer.AnalysisFunction + assert.NotNil(t, analysisFunction) + assert.Len(t, analysisFunction.Parameters, 2) + assert.Len(t, analysisFunction.Parameters["sources"], 1) + assert.NoError(t, err) + + analysisFuncAnalyzer := GetAnalysisFunction(&yamlAnalyzer) + assert.Equal(t, analysisFuncAnalyzer.Name, "taint_detector") +} + +func TestAnalysisFunction(t *testing.T) { + path := "testdata/mock-analysis-function.yml" + passed, err := runTests(path) + assert.NoError(t, err) + assert.True(t, passed) +} diff --git a/pkg/cli/testdata/mock-analysis-function.test.js b/pkg/cli/testdata/mock-analysis-function.test.js new file mode 100644 index 00000000..ab0bffb0 --- /dev/null +++ b/pkg/cli/testdata/mock-analysis-function.test.js @@ -0,0 +1,12 @@ +function getUserInput(key) { + + return document.getElementById(key).value; + +} + +userInput = getUserInput('username') + +// A sink method, which performs some raw databse operation on the userInput + +// +perform_db_operation(userInput) \ No newline at end of file diff --git a/pkg/cli/testdata/mock-analysis-function.yml b/pkg/cli/testdata/mock-analysis-function.yml new file mode 100644 index 00000000..400023c4 --- /dev/null +++ b/pkg/cli/testdata/mock-analysis-function.yml @@ -0,0 +1,23 @@ +name: "run_taint_analysis" +language: javascript +category: security +severity: high +message: "This is just a mock checker" +analysisFunction: + name: taint + parameters: + sources: + - | + (call_expression + function: (identifier) @sourceName + (#eq? @sourceName "getUserInput")) + sinks: + - | + (call_expression + function: (identifier) @sinkName + (#eq? @sinkName "perform_db_operation")) + +pattern: | + (call_expression) + +description: "Runs a taint analysis on the provided function and its parameters." \ No newline at end of file