Skip to content

Commit 71b96ac

Browse files
authored
Integrate tree-sitter queries into the search (#1944)
* feat(wip): include parse data in query and search * Revert "feat(wip): include parse data in query and search" This reverts commit 5cf95d9. * feat(wip): reintroduce tree-sitter-specific wip * feat(wip): some more tree-sitter framework * feat: attach tree-sitter node ids to our nodes for conversion * feat: initial tree-sitter query impl * test: assignment test yay * feat: allow specifying what capture names to match * refactor: use proper logging for search generators * feat: allow capture names to contain the @ * refactor: fix up stuff based on the review * refactor: combine ParserInformation and ParserMetadata * refactor: remove unnecessary as * test: some additional tests * refactor: rename fromTreeSitterQuery to syntax * feat-fix: fixed wiki compile
1 parent 73d6403 commit 71b96ac

File tree

16 files changed

+257
-92
lines changed

16 files changed

+257
-92
lines changed

src/cli/repl/commands/repl-execute.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import type { ReplCommand, ReplCommandInformation, ReplOutput } from './repl-main';
22
import { ColorEffect, Colors, FontStyles, italic } from '../../../util/text/ansi';
33
import type { ReadonlyFlowrAnalysisProvider } from '../../../project/flowr-analyzer';
4+
import type { RShellInformation } from '../../../r-bridge/parser';
45

56
export async function tryExecuteRShellCommand({ output, analyzer, allowRSessionAccess, remainingLine }: ReplCommandInformation) {
6-
const parserInfo = await analyzer.parserInformation();
7+
const parserInfo = analyzer.parserInformation();
78
if(!allowRSessionAccess){
89
output.stderr(`${output.formatter.format('You are not allowed to execute arbitrary R code.', { style: FontStyles.Bold, color: Colors.Red, effect: ColorEffect.Foreground })}
910
If you want to do so, please restart flowR with the ${output.formatter.format('--r-session-access', { style: FontStyles.Bold })} flag${ parserInfo.name !== 'r-shell' ? '. Additionally, please enable the r-shell engine, e.g., with ' + output.formatter.format('--default-engine r-shell', { style: FontStyles.Bold }) : ''}. Please be careful of the security implications of this action. When running flowR with npm, you have to use an extra ${output.formatter.format('--', { style: FontStyles.Bold })} to separate flowR from npm arguments.`);
@@ -16,7 +17,7 @@ If you want to do so, please restart flowR with the ${output.formatter.format('-
1617

1718
async function executeRShellCommand(output: ReplOutput, analyzer: ReadonlyFlowrAnalysisProvider, statement: string) {
1819
try {
19-
const result = await analyzer.sendCommandWithOutput(statement, {
20+
const result = await (analyzer.parserInformation() as RShellInformation).sendCommandWithOutput(statement, {
2021
from: 'both',
2122
automaticallyTrimOutput: true
2223
});

src/cli/repl/commands/repl-parse.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ export const parseCommand: ReplCodeCommand = {
167167
},
168168
fn: async({ output, analyzer }) => {
169169
const result = await analyzer.parse();
170-
const parserInfo = await analyzer.parserInformation();
170+
const parserInfo = analyzer.parserInformation();
171171

172172
if(parserInfo.name === 'r-shell') {
173173
const object = convertPreparedParsedData(prepareParsedData(result.parsed as unknown as string));

src/core/steps/all/core/01-parse-tree-sitter.ts

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,6 @@ import type { Tree } from 'web-tree-sitter';
55
import type { ParseRequiredInput } from '../../../../r-bridge/parser';
66
import { parseRequests } from '../../../../r-bridge/parser';
77

8-
export interface ParseStepOutputTS {
9-
readonly parsed: Tree
10-
}
11-
12-
export interface TreeSitterParseJson {
13-
readonly '.meta': {
14-
readonly tokenCount: number,
15-
readonly tokenCountNoComments: number
16-
},
17-
readonly str: string
18-
}
19-
208
export const PARSE_WITH_TREE_SITTER_STEP = {
219
name: 'parse',
2210
humanReadableName: 'parse with tree-sitter',

src/core/steps/all/core/11-normalize-tree-sitter.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ import type { DeepReadonly } from 'ts-essentials';
1111
import { normalizeTreeSitter } from '../../../../r-bridge/lang-4.x/ast/parser/json/parser';
1212
import type { NormalizeRequiredInput } from './10-normalize';
1313
import { getCurrentRequestFile } from './10-normalize';
14-
import type { ParseStepOutputTS } from './01-parse-tree-sitter';
1514
import type { FlowrConfigOptions } from '../../../../config';
15+
import type { ParseStepOutput } from '../../../../r-bridge/parser';
16+
import type { Tree } from 'web-tree-sitter';
1617

17-
function processor(results: { 'parse'?: ParseStepOutputTS }, input: Partial<NormalizeRequiredInput>, config: FlowrConfigOptions) {
18-
return normalizeTreeSitter(results['parse'] as ParseStepOutputTS, input.getId, config, input.overwriteFilePath ?? getCurrentRequestFile(input.request));
18+
function processor(results: { 'parse'?: ParseStepOutput<Tree> }, input: Partial<NormalizeRequiredInput>, config: FlowrConfigOptions) {
19+
return normalizeTreeSitter(results['parse'] as ParseStepOutput<Tree>, input.getId, config, input.overwriteFilePath ?? getCurrentRequestFile(input.request));
1920
}
2021

2122
export const NORMALIZE_TREE_SITTER = {

src/project/flowr-analyzer.ts

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import type { FlowrConfigOptions } from '../config';
22

3-
import type { KnownParser, ParseStepOutput, RShellInformation, TreeSitterInformation } from '../r-bridge/parser';
3+
import type {
4+
KnownParser, KnownParserInformation,
5+
ParseStepOutput
6+
} from '../r-bridge/parser';
47
import type { Queries, QueryResults, SupportedQueryTypes } from '../queries/query';
58
import { executeQueries } from '../queries/query';
69
import type { ControlFlowInformation } from '../control-flow/control-flow-graph';
@@ -14,9 +17,6 @@ import type { GetSearchElements } from '../search/flowr-search-executor';
1417
import { runSearch } from '../search/flowr-search-executor';
1518
import type { FlowrAnalyzerContext, ReadOnlyFlowrAnalyzerContext } from './context/flowr-analyzer-context';
1619
import { CfgKind } from './cfg-kind';
17-
import type { OutputCollectorConfiguration } from '../r-bridge/shell';
18-
import { RShell } from '../r-bridge/shell';
19-
import { guard } from '../util/assert';
2020
import type { RAnalysisRequest } from './context/flowr-analyzer-files-context';
2121

2222
/**
@@ -48,16 +48,11 @@ export interface FlowrAnalysisProvider extends ReadonlyFlowrAnalysisProvider {
4848
* This allows us to exchange the underlying implementation of the analyzer without affecting the APIs.
4949
*/
5050
export interface ReadonlyFlowrAnalysisProvider {
51-
/**
52-
* Get the name of the parser used by the analyzer.
53-
*/
54-
parserInformation(): Promise<TreeSitterInformation | RShellInformation>;
55-
/**
56-
* Sends a command to the underlying R engine and collects the output.
57-
* @param command - The command to send to the R engine.
58-
* @param addonConfig - Additional configuration for the output collector.
59-
*/
60-
sendCommandWithOutput(command: string, addonConfig?: Partial<OutputCollectorConfiguration>): Promise<string[]>;
51+
/**
52+
* Returns a set of additional data and helper functions exposed by the underlying {@link KnownParser},
53+
* including the parser's {@link BaseParserInformation.name} and corresponding version information.
54+
*/
55+
parserInformation(): KnownParserInformation
6156
/**
6257
* Returns a read-only version of the project context information.
6358
* This is the preferred method for users that want to inspect the context.
@@ -120,6 +115,7 @@ export class FlowrAnalyzer<Parser extends KnownParser = KnownParser> implements
120115
/** The cache used for storing analysis results */
121116
private readonly cache: FlowrAnalyzerCache<Parser>;
122117
private readonly ctx: FlowrAnalyzerContext;
118+
private parserInfo: KnownParserInformation | undefined;
123119

124120
/**
125121
* Create a new analyzer instance.
@@ -141,15 +137,9 @@ export class FlowrAnalyzer<Parser extends KnownParser = KnownParser> implements
141137
return this.ctx;
142138
}
143139

144-
public async parserInformation(): Promise<TreeSitterInformation | RShellInformation> {
145-
return this.parser.name === 'r-shell' ?
146-
{ name: 'r-shell', rVersion: await (this.parser as RShell).rVersion() }
147-
: { name: 'tree-sitter' };
148-
}
149-
150-
public async sendCommandWithOutput(command: string, addonConfig?: Partial<OutputCollectorConfiguration>): Promise<string[]> {
151-
guard(this.parser instanceof RShell, 'sendCommandWithOutput can only be used with RShell parsers!');
152-
return this.parser.sendCommandWithOutput(command, addonConfig);
140+
public parserInformation(): KnownParserInformation {
141+
this.parserInfo ??= this.parser.information(this);
142+
return this.parserInfo;
153143
}
154144

155145
public inspectContext(): ReadOnlyFlowrAnalyzerContext {
@@ -208,4 +198,4 @@ export class FlowrAnalyzer<Parser extends KnownParser = KnownParser> implements
208198
public close() {
209199
return this.parser?.close();
210200
}
211-
}
201+
}

src/r-bridge/lang-4.x/ast/parser/json/parser.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ import { decorateAst, deterministicCountingIdGenerator } from '../../model/proce
55
import type { NoInfo, RNode } from '../../model/model';
66
import { normalizeRootObjToAst } from '../main/internal/structure/normalize-root';
77
import type { NormalizerData } from '../main/normalizer-data';
8-
import type { ParseStepOutputTS } from '../../../../../core/steps/all/core/01-parse-tree-sitter';
98
import { normalizeTreeSitterTreeToAst } from '../../../tree-sitter/tree-sitter-normalize';
109
import type { ParseStepOutput } from '../../../../parser';
1110
import type { FlowrConfigOptions } from '../../../../../config';
1211
import { getEngineConfig } from '../../../../../config';
12+
import type { Tree } from 'web-tree-sitter';
1313

1414
export const parseLog = log.getSubLogger({ name: 'ast-parser' });
1515

@@ -44,7 +44,7 @@ export function normalizeButNotDecorated(
4444
* Tree-Sitter pendant to {@link normalize}.
4545
*/
4646
export function normalizeTreeSitter(
47-
{ parsed }: ParseStepOutputTS,
47+
{ parsed }: ParseStepOutput<Tree>,
4848
getId: IdGenerator<NoInfo> = deterministicCountingIdGenerator(0),
4949
config: FlowrConfigOptions,
5050
file?: string

src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
import type { Query, QueryCapture, Tree } from 'web-tree-sitter';
12
import Parser from 'web-tree-sitter';
23

34
import type { RParseRequest } from '../../retriever';
4-
import type { SyncParser } from '../../parser';
5+
import type { SyncParser, TreeSitterInformation } from '../../parser';
56
import type { TreeSitterEngineConfig } from '../../../config';
67
import { log } from '../../../util/log';
78
import fs from 'fs';
9+
import type { FlowrAnalysisProvider } from '../../../project/flowr-analyzer';
810

911
export const DEFAULT_TREE_SITTER_R_WASM_PATH = './node_modules/@eagleoutice/tree-sitter-r/tree-sitter-r.wasm';
1012
export const DEFAULT_TREE_SITTER_WASM_PATH = './node_modules/web-tree-sitter/tree-sitter.wasm';
@@ -17,7 +19,7 @@ const wasmLog = log.getSubLogger({ name: 'tree-sitter-wasm' });
1719
export class TreeSitterExecutor implements SyncParser<Parser.Tree> {
1820

1921
public readonly name = 'tree-sitter';
20-
public readonly parser: Parser;
22+
private readonly parser: Parser;
2123
private static language: Parser.Language;
2224

2325
/**
@@ -54,6 +56,16 @@ export class TreeSitterExecutor implements SyncParser<Parser.Tree> {
5456
return Promise.resolve('none');
5557
}
5658

59+
public information(analyzer: FlowrAnalysisProvider): TreeSitterInformation {
60+
return {
61+
name: 'tree-sitter',
62+
grammarVersion: this.treeSitterVersion(),
63+
treeSitterQuery: async(source: Query | string, force?: boolean) => {
64+
return this.query(source, (await analyzer.parse(force)).parsed as Tree);
65+
}
66+
};
67+
}
68+
5769
public treeSitterVersion(): number {
5870
return this.parser.getLanguage().version;
5971
}
@@ -68,6 +80,16 @@ export class TreeSitterExecutor implements SyncParser<Parser.Tree> {
6880
return this.parser.parse(sourceCode);
6981
}
7082

83+
public createQuery(source: string): Query {
84+
return this.parser.getLanguage().query(source);
85+
}
86+
87+
public query(source: Query | string, tree: Parser.Tree): QueryCapture[] {
88+
const query = typeof source === 'string' ? this.createQuery(source) : source;
89+
const matches = query.matches(tree.rootNode);
90+
return matches.flatMap(m => m.captures);
91+
}
92+
7193
public close(): void {
7294
this.parser.delete();
7395
}

0 commit comments

Comments
 (0)