From 2fe1f71974938fd3daae240b87b4335f876f5934 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 25 Jul 2025 15:38:40 +0800 Subject: [PATCH 01/13] Custom pylsp is based on [python-lsp-server](https://github.com/python-lsp/python-lsp-server), and plus the following pull requests: 1. semanticTokens/full: https://github.com/python-lsp/python-lsp-server/pull/645 2. typeDefinition: https://github.com/python-lsp/python-lsp-server/pull/533 Maybe also 3. implementation: https://github.com/python-lsp/python-lsp-server/pull/644 --- lang/collect/collect.go | 3 ++ lang/parse.go | 5 +++ lang/python/lib.go | 42 ++++++++++++++++++++ lang/python/spec.go | 88 +++++++++++++++++++++++++++++++++++++++++ lang/uniast/ast.go | 5 +++ 5 files changed, 143 insertions(+) create mode 100644 lang/python/lib.go create mode 100644 lang/python/spec.go diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 3b713a1..e16c2f6 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -25,6 +25,7 @@ import ( "unicode" "github.com/cloudwego/abcoder/lang/cxx" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/log" . "github.com/cloudwego/abcoder/lang/lsp" "github.com/cloudwego/abcoder/lang/rust" @@ -88,6 +89,8 @@ func switchSpec(l uniast.Language) LanguageSpec { return &rust.RustSpec{} case uniast.Cxx: return &cxx.CxxSpec{} + case uniast.Python: + return &python.PythonSpec{} default: panic(fmt.Sprintf("unsupported language %s", l)) } diff --git a/lang/parse.go b/lang/parse.go index 172d12f..9ef9530 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -30,6 +30,7 @@ import ( "github.com/cloudwego/abcoder/lang/golang/parser" "github.com/cloudwego/abcoder/lang/log" "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/rust" "github.com/cloudwego/abcoder/lang/uniast" ) @@ -106,6 +107,8 @@ func checkRepoPath(repoPath string, language uniast.Language) (openfile string, openfile, wait = rust.CheckRepo(repoPath) case uniast.Cxx: openfile, wait = cxx.CheckRepo(repoPath) + case uniast.Python: + openfile, wait = python.CheckRepo(repoPath) default: openfile = "" wait = 0 @@ -121,6 +124,8 @@ func checkLSP(language uniast.Language, lspPath string) (l uniast.Language, s st l, s = rust.GetDefaultLSP() case uniast.Cxx: l, s = cxx.GetDefaultLSP() + case uniast.Python: + l, s = python.GetDefaultLSP() case uniast.Golang: l = uniast.Golang s = "" diff --git a/lang/python/lib.go b/lang/python/lib.go new file mode 100644 index 0000000..2639787 --- /dev/null +++ b/lang/python/lib.go @@ -0,0 +1,42 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "time" + + "github.com/cloudwego/abcoder/lang/uniast" + "github.com/cloudwego/abcoder/lang/utils" +) + +const MaxWaitDuration = 5 * time.Minute + +func GetDefaultLSP() (lang uniast.Language, name string) { + // needs to use the pylsp from https://github.com/python-lsp/python-lsp-server/pull/533 + return uniast.Python, "pylsp" +} + +func CheckRepo(repo string) (string, time.Duration) { + openfile := "" + // TODO: check if the project compiles. + + // NOTICE: wait for Rust projects based on code files + _, size := utils.CountFiles(repo, ".py", "SKIPDIR") + wait := 2*time.Second + time.Second*time.Duration(size/1024) + if wait > MaxWaitDuration { + wait = MaxWaitDuration + } + return openfile, wait +} diff --git a/lang/python/spec.go b/lang/python/spec.go new file mode 100644 index 0000000..fbbfe02 --- /dev/null +++ b/lang/python/spec.go @@ -0,0 +1,88 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + lsp "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/uniast" +) + +type PythonSpec struct { + repo string +} + +func NewPythonSpec() *PythonSpec { + return &PythonSpec{} +} + +func (c *PythonSpec) WorkSpace(root string) (map[string]string, error) { + panic("TODO") +} + +func (c *PythonSpec) NameSpace(path string) (string, string, error) { + panic("TODO") +} + +func (c *PythonSpec) ShouldSkip(path string) bool { + panic("TODO") +} + +func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { + panic("TODO") +} + +func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { + panic("TODO") +} + +func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { + panic("TODO") +} + +func (c *PythonSpec) TokenKind(tok lsp.Token) lsp.SymbolKind { + panic("TODO") +} + +func (c *PythonSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *PythonSpec) HasImplSymbol() bool { + panic("TODO") +} + +func (c *PythonSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { + panic("TODO") +} + +func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { + panic("TODO") +} + +func (c *PythonSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (string, error) { + panic("TODO") +} + +func (c *PythonSpec) FileImports(content []byte) ([]uniast.Import, error) { + panic("TODO") +} diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index eeb1c87..f164959 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -30,6 +30,7 @@ const ( Golang Language = "go" Rust Language = "rust" Cxx Language = "cxx" + Python Language = "python" Unknown Language = "" ) @@ -41,6 +42,8 @@ func (l Language) String() string { return "go" case Cxx: return "cxx" + case Python: + return "python" default: return string(l) } @@ -59,6 +62,8 @@ func NewLanguage(lang string) (l Language) { return Rust case "cxx": return Cxx + case "python": + return Python default: return Unknown } From 0779d2caf8bf7387db1e75f2999ba8efcc2837a1 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 25 Jul 2025 15:42:16 +0800 Subject: [PATCH 02/13] tests: add & cleanup tests. --- testdata/pyglobvar/main.py | 24 +++++++ testdata/pyimport/main.py | 21 ++++++ testdata/pysimpleobj/main.py | 27 ++++++++ testdata/pythonsimple/test.py | 85 ++++++++++++++++++++++++ testdata/pythonsimple/test2.py | 29 +++++++++ testdata/pythonsimple/test3.py | 33 ++++++++++ testdata/pythonsingle/main.py | 100 +++++++++++++++++++++++++++++ testdata/rust2/src/entity/func.rs | 6 +- testdata/rust2/src/entity/inter.rs | 6 +- testdata/rust2/src/entity/mod.rs | 6 +- testdata/rustsimpleobj/Cargo.toml | 6 ++ testdata/rustsimpleobj/src/main.rs | 35 ++++++++++ 12 files changed, 369 insertions(+), 9 deletions(-) create mode 100644 testdata/pyglobvar/main.py create mode 100644 testdata/pyimport/main.py create mode 100644 testdata/pysimpleobj/main.py create mode 100644 testdata/pythonsimple/test.py create mode 100644 testdata/pythonsimple/test2.py create mode 100644 testdata/pythonsimple/test3.py create mode 100644 testdata/pythonsingle/main.py create mode 100644 testdata/rustsimpleobj/Cargo.toml create mode 100644 testdata/rustsimpleobj/src/main.rs diff --git a/testdata/pyglobvar/main.py b/testdata/pyglobvar/main.py new file mode 100644 index 0000000..090dac3 --- /dev/null +++ b/testdata/pyglobvar/main.py @@ -0,0 +1,24 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def foo(): + return 2 + + +def bar(): + return foo() + + +v = foo() diff --git a/testdata/pyimport/main.py b/testdata/pyimport/main.py new file mode 100644 index 0000000..b094941 --- /dev/null +++ b/testdata/pyimport/main.py @@ -0,0 +1,21 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# similar to rust's `pub use`. +# should main.py::types include Union? +from typing import Union + + +def main(): + pass diff --git a/testdata/pysimpleobj/main.py b/testdata/pysimpleobj/main.py new file mode 100644 index 0000000..5c672ce --- /dev/null +++ b/testdata/pysimpleobj/main.py @@ -0,0 +1,27 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Foo: + def __init__(self): + self.x = 5 + + def bar(self, v: int) -> int: + self.x += v + return self.x + + +def main(): + f = Foo() + f.bar(6) diff --git a/testdata/pythonsimple/test.py b/testdata/pythonsimple/test.py new file mode 100644 index 0000000..7be5114 --- /dev/null +++ b/testdata/pythonsimple/test.py @@ -0,0 +1,85 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union +from test2 import IntPair +from test3 import * + + +def swap_pair(pair: IntPair) -> None: + """ + Swaps the values of a and b in an IntPair. + Note: The original Rust code had a logical error if a swap was intended; + it would result in both pair.a and pair.b being set to the original value of pair.a. + This Python version implements a correct swap. + """ + pair.a, pair.b = pair.b, pair.a + + +from test3 import * + + +def add(a: int, b: int) -> int: + return a + b + + +def compare(a: int, b: int) -> int: + if a < b: + return -1 + elif a > b: + return 1 + else: + return 0 + + +IntOrChar = Union[IntVariant, CharVariant] +# TODO: global var not suppported +globalvar = 5 + + +def main() -> None: + global globalvar + globalvar = 65 + + ls = list((1, 2)) + + x = add(2, 3) + print(x) + + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") + swap_pair(my_pair) + print(f"Swapped pair: {my_pair}") + print(f"my_pair.sum = {my_pair.sum()}") + + val1: IntOrChar = IntVariant(123) + val2: IntOrChar = CharVariant(ord("A")) + + print(f"IntOrChar 1: {val1}") + print(f"IntOrChar 2: {val2}") + + if isinstance(val1, IntVariant): + print(f"val1 is an IntVariant with value: {val1.value}") + if isinstance(val2, CharVariant): + print( + f"val2 is a CharVariant with u8 value: {val2.value} (char: '{chr(val2.value)}')" + ) + + print(f"Comparing 5 and 10: {compare(5, 10)}") + print(f"Comparing 10 and 5: {compare(10, 5)}") + print(f"Comparing 7 and 7: {compare(7, 7)}") + + +if __name__ == "__main__": + main() diff --git a/testdata/pythonsimple/test2.py b/testdata/pythonsimple/test2.py new file mode 100644 index 0000000..9145de0 --- /dev/null +++ b/testdata/pythonsimple/test2.py @@ -0,0 +1,29 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + + +@dataclass +class IntPair: + a: int + b: int + + def sum(self): + return self.a + self.b + + +def main() -> None: + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") diff --git a/testdata/pythonsimple/test3.py b/testdata/pythonsimple/test3.py new file mode 100644 index 0000000..bd1b5b6 --- /dev/null +++ b/testdata/pythonsimple/test3.py @@ -0,0 +1,33 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class IntVariant: + def __init__(self, value: int): + self.value: int = value + + def __repr__(self) -> str: + return f"IntVariant({self.value})" + + +class CharVariant: + def __init__(self, value: int): + if not (0 <= value <= 255): + raise ValueError( + "CharVariant value must be an integer between 0 and 255 (u8 equivalent)" + ) + self.value: int = value + + def __repr__(self) -> str: + return f"CharVariant(value={self.value}, char='{chr(self.value)}')" diff --git a/testdata/pythonsingle/main.py b/testdata/pythonsingle/main.py new file mode 100644 index 0000000..a9b2d0f --- /dev/null +++ b/testdata/pythonsingle/main.py @@ -0,0 +1,100 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from dataclasses import dataclass +from typing import Union + + +@dataclass +class IntPair: + a: int + b: int + + +def swap_pair(pair: IntPair) -> None: + """ + Swaps the values of a and b in an IntPair. + Note: The original Rust code had a logical error if a swap was intended; + it would result in both pair.a and pair.b being set to the original value of pair.a. + This Python version implements a correct swap. + """ + pair.a, pair.b = pair.b, pair.a + + +class IntVariant: + def __init__(self, value: int): + self.value: int = value + + def __repr__(self) -> str: + return f"IntVariant({self.value})" + + +class CharVariant: + def __init__(self, value: int): + if not (0 <= value <= 255): + raise ValueError( + "CharVariant value must be an integer between 0 and 255 (u8 equivalent)" + ) + self.value: int = value + + def __repr__(self) -> str: + return f"CharVariant(value={self.value}, char='{chr(self.value)}')" + + +IntOrChar = Union[IntVariant, CharVariant] + + +def add(a: int, b: int) -> int: + return a + b + + +def compare(a: int, b: int) -> int: + if a < b: + return -1 + elif a > b: + return 1 + else: + return 0 + + +def main() -> None: + x = add(2, 3) + print(x) + + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") + swap_pair(my_pair) + print(f"Swapped pair: {my_pair}") + + val1: IntOrChar = IntVariant(123) + val2: IntOrChar = CharVariant(ord("A")) + + print(f"IntOrChar 1: {val1}") + print(f"IntOrChar 2: {val2}") + + if isinstance(val1, IntVariant): + print(f"val1 is an IntVariant with value: {val1.value}") + if isinstance(val2, CharVariant): + print( + f"val2 is a CharVariant with u8 value: {val2.value} (char: '{chr(val2.value)}')" + ) + + print(f"Comparing 5 and 10: {compare(5, 10)}") + print(f"Comparing 10 and 5: {compare(10, 5)}") + print(f"Comparing 7 and 7: {compare(7, 7)}") + + +if __name__ == "__main__": + main() diff --git a/testdata/rust2/src/entity/func.rs b/testdata/rust2/src/entity/func.rs index 3a24c6a..17288b5 100644 --- a/testdata/rust2/src/entity/func.rs +++ b/testdata/rust2/src/entity/func.rs @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/testdata/rust2/src/entity/inter.rs b/testdata/rust2/src/entity/inter.rs index f59188a..009139d 100644 --- a/testdata/rust2/src/entity/inter.rs +++ b/testdata/rust2/src/entity/inter.rs @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/testdata/rust2/src/entity/mod.rs b/testdata/rust2/src/entity/mod.rs index c197ca2..cba1e2b 100644 --- a/testdata/rust2/src/entity/mod.rs +++ b/testdata/rust2/src/entity/mod.rs @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/testdata/rustsimpleobj/Cargo.toml b/testdata/rustsimpleobj/Cargo.toml new file mode 100644 index 0000000..0ba2a47 --- /dev/null +++ b/testdata/rustsimpleobj/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "rustsimpleobj" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/testdata/rustsimpleobj/src/main.rs b/testdata/rustsimpleobj/src/main.rs new file mode 100644 index 0000000..2efed47 --- /dev/null +++ b/testdata/rustsimpleobj/src/main.rs @@ -0,0 +1,35 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +struct Foo(u32); + +impl Foo { + pub fn new(value: u32) -> Self { + Foo(value) + } + + pub fn bar(&mut self, increment: u32) { + self.0 += increment; + } + + pub fn faz(&mut self, decrement: u32) { + self.0 -= decrement; + } +} + +fn main() { + let mut my_foo = Foo::new(10); + my_foo.bar(5); + my_foo.faz(5); +} From 5910d58676b0a1ef4503385eef6f0206cf211682 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 25 Jul 2025 17:13:51 +0800 Subject: [PATCH 03/13] fix: typo --- lang/rust/spec.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lang/rust/spec.go b/lang/rust/spec.go index 140bf3e..e6b1aa2 100644 --- a/lang/rust/spec.go +++ b/lang/rust/spec.go @@ -182,7 +182,7 @@ func hasKeyword(tokens []lsp.Token, keyword string) int { return -1 } -func findSpecifiToken(tokens []lsp.Token, typ string, text string) int { +func findSpecificToken(tokens []lsp.Token, typ string, text string) int { for i := 0; i < len(tokens); i++ { if tokens[i].Type == typ && tokens[i].Text == text { return i @@ -191,7 +191,7 @@ func findSpecifiToken(tokens []lsp.Token, typ string, text string) int { return -1 } -func findSpecifiTokenUntil(tokens []lsp.Token, typ string, text string, start int, end int) int { +func findSpecificTokenUntil(tokens []lsp.Token, typ string, text string, start int, end int) int { for i := start; i < end; i++ { if tokens[i].Type == typ && tokens[i].Text == text { return i @@ -222,8 +222,8 @@ func (c *RustSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { // find the impl type token var implType, receiverType = -1, -1 - var fn = start + findSpecifiToken(tokens[start:], "keyword", "fn") - var forToken = findSpecifiTokenUntil(tokens, "keyword", "for", start, fn) + var fn = start + findSpecificToken(tokens[start:], "keyword", "fn") + var forToken = findSpecificTokenUntil(tokens, "keyword", "for", start, fn) for i := start; i < forToken; i++ { if tokens[i].Type == "interface" { @@ -258,11 +258,11 @@ func (c *RustSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, [] } // exclude #[xxx] - fn := start + findSpecifiToken(tokens[start:], "keyword", "fn") + fn := start + findSpecificToken(tokens[start:], "keyword", "fn") if fn < 0 { return -1, nil, nil, nil } - where := start + findSpecifiToken(tokens[start:], "keyword", "where") + where := start + findSpecificToken(tokens[start:], "keyword", "where") if where == -1 { where = len(tokens) - 1 } From 7b96fb31ac9dd8b59b7b79728ef6b60fe950bac2 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 25 Jul 2025 17:40:28 +0800 Subject: [PATCH 04/13] docs: clarify mod vs pkg --- docs/uniast-en.md | 12 ++++++++++-- docs/uniast-zh.md | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/docs/uniast-en.md b/docs/uniast-en.md index 706f19c..1add6e5 100644 --- a/docs/uniast-en.md +++ b/docs/uniast-en.md @@ -5,8 +5,7 @@ Universal Abstract-Syntax-Tree is a LLM-friendly, language-agnostic code context # Identity Node Unique Identification -To ensure precise querying and scalable storage, `ModPath?PkgPath#SymbolName` is约定 as the globally unique identifier for AST Nodes. - +To ensure precise querying and scalable storage, `ModPath?PkgPath#SymbolName` is as the globally unique identifier for AST Nodes. For example: ```json { @@ -16,6 +15,15 @@ To ensure precise querying and scalable storage, `ModPath?PkgPath#SymbolName` is } ``` +> Note that different languages have different descriptions of module and package. For example: +> * In Go, a module refers to a project that contains multiple packages, and a package includes all the files within a specific directory. +> * In Python, a package is a directory, which may contain sub-packages. A package can also contain modules, which are .py files inside the package directory. +> * In Rust, the term package does not exist at all. Instead, a crate (project) contains multiple modules, and modules may include sub-modules. +> * In C, neither concept exists at all. +> +> Do not confuse them with the terminology used in abcoder! +> In abcoder, unless otherwise specified, the module (mod) and package (pkg) are defined as follows: + - ModPath: A complete build unit where the content is the installation path@version number. This information is not required for LLMs but is preserved to ensure global uniqueness of Identity. It corresponds to different concepts in various languages: - Golang: Corresponds to a module, e.g., github.com/cloudwego/hertz@v0.1.0 diff --git a/docs/uniast-zh.md b/docs/uniast-zh.md index 5273486..d20594b 100644 --- a/docs/uniast-zh.md +++ b/docs/uniast-zh.md @@ -5,8 +5,7 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 # Identity 节点唯一标识 -为了保证精确查询和可扩展存储,约定 `ModPath?PkgPath#SymbolName` 为 AST Node 的全球唯一标识。 - +为了保证精确查询和可扩展存储,约定 `ModPath?PkgPath#SymbolName` 为 AST Node 的全球唯一标识。例如: ```json { @@ -16,6 +15,15 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 } ``` +> 注意,不同的语言对 module 和 package 的描述不同,例如 +> * 在 Go 中 module 表示一个项目,包含了若干 package。而 package 包含了某目录下的诸文件。 +> * 在 Python 中则是,package 是一个目录,可能包含子 package。而且 package 也可能包含 module,是 package 目录下的 py 文件。 +> * 在 Rust 中根本没有 package 的说法,而是 crate(项目)包含了诸 module。module 可能包含子 module。 +> * 在 C 中就完全没有这两个东西。 +> +> 不要把它们和 abcoder 的描述混淆! +> 在 abcoder 中,除非另外说明,module(mod) / package(pkg) 的含义如下。 + - ModPath: 一个完整的构建单元,ModPath 内容为安装路径@版本号。该信息对于 LLM 并不需要,只是为了保证 Identity 的全球唯一性而保存。它在各个语言中对应不同概念: - Golang: 对应 module,如 github.com/cloudwego/hertz@v0.1.0 From 36711bbdf82f8da27589cf4515fbb395b06258a2 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 25 Jul 2025 17:41:49 +0800 Subject: [PATCH 05/13] feat: initial support for python parse Aside from (partially) implementing the python specification: * fix hardcoded semanticTokens/range availability * remove the unnecessary implementationProvider check --- lang/collect/collect.go | 19 +-- lang/lsp/client.go | 13 +- lang/lsp/lsp.go | 28 ++--- lang/python/lib.go | 4 +- lang/python/spec.go | 262 ++++++++++++++++++++++++++++++++++++++-- 5 files changed, 283 insertions(+), 43 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index e16c2f6..f8cadbb 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -25,9 +25,9 @@ import ( "unicode" "github.com/cloudwego/abcoder/lang/cxx" - "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/log" . "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/rust" "github.com/cloudwego/abcoder/lang/uniast" ) @@ -124,7 +124,7 @@ func (c *Collector) Collect(ctx context.Context) error { } // scan all files - roots := make([]*DocumentSymbol, 0, 1024) + root_syms := make([]*DocumentSymbol, 0, 1024) scanner := func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -172,6 +172,11 @@ func (c *Collector) Collect(ctx context.Context) error { if err != nil { return err } + // HACK: skip imported symbols (do not expose imported symbols in Python) + // TODO: make this behavior consistent in python and rust (where we have pub use vs use) + if c.Language == uniast.Python && (strings.HasPrefix(content, "from ") || strings.HasPrefix(content, "import ")) { + continue + } // collect tokens tokens, err := c.cli.SemanticTokens(ctx, sym.Location) if err != nil { @@ -180,7 +185,7 @@ func (c *Collector) Collect(ctx context.Context) error { sym.Text = content sym.Tokens = tokens c.syms[sym.Location] = sym - roots = append(roots, sym) + root_syms = append(root_syms, sym) } return nil @@ -190,11 +195,11 @@ func (c *Collector) Collect(ctx context.Context) error { } // collect some extra metadata - syms := make([]*DocumentSymbol, 0, len(roots)) - for _, sym := range roots { + entity_syms := make([]*DocumentSymbol, 0, len(root_syms)) + for _, sym := range root_syms { // only language entity symbols need to be collect on next if c.spec.IsEntitySymbol(*sym) { - syms = append(syms, sym) + entity_syms = append(entity_syms, sym) } c.processSymbol(ctx, sym, 1) } @@ -232,7 +237,7 @@ func (c *Collector) Collect(ctx context.Context) error { // } // collect dependencies - for _, sym := range syms { + for _, sym := range entity_syms { next_token: for i, token := range sym.Tokens { diff --git a/lang/lsp/client.go b/lang/lsp/client.go index 58479fe..d4ecbf2 100644 --- a/lang/lsp/client.go +++ b/lang/lsp/client.go @@ -32,9 +32,10 @@ import ( type LSPClient struct { *jsonrpc2.Conn *lspHandler - tokenTypes []string - tokenModifiers []string - files map[DocumentURI]*TextDocumentItem + tokenTypes []string + tokenModifiers []string + hasSemanticTokensRange bool + files map[DocumentURI]*TextDocumentItem ClientOptions } @@ -156,10 +157,6 @@ func initLSPClient(ctx context.Context, svr io.ReadWriteCloser, dir DocumentURI, return nil, fmt.Errorf("server did not provide TypeDefinition") } - implementationProvider, ok := vs["implementationProvider"].(bool) - if !ok || !implementationProvider { - return nil, fmt.Errorf("server did not provide Implementation") - } documentSymbolProvider, ok := vs["documentSymbolProvider"].(bool) if !ok || !documentSymbolProvider { return nil, fmt.Errorf("server did not provide DocumentSymbol") @@ -174,6 +171,8 @@ func initLSPClient(ctx context.Context, svr io.ReadWriteCloser, dir DocumentURI, if !ok || semanticTokensProvider == nil { return nil, fmt.Errorf("server did not provide SemanticTokensProvider") } + semanticTokensRange, ok := semanticTokensProvider["range"].(bool) + cli.hasSemanticTokensRange = ok && semanticTokensRange legend, ok := semanticTokensProvider["legend"].(map[string]interface{}) if !ok || legend == nil { return nil, fmt.Errorf("server did not provide SemanticTokensProvider.legend") diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go index 02ee192..e59d1b6 100644 --- a/lang/lsp/lsp.go +++ b/lang/lsp/lsp.go @@ -24,7 +24,6 @@ import ( "sort" "strings" - "github.com/cloudwego/abcoder/lang/uniast" "github.com/cloudwego/abcoder/lang/utils" "github.com/sourcegraph/go-lsp" ) @@ -285,22 +284,23 @@ func (cli *LSPClient) References(ctx context.Context, id Location) ([]Location, return resp, nil } -// TODO(perf): cache results especially for whole file queries. -// TODO(refactor): infer use_full_method from capabilities -func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens, use_full_method bool) error { - if use_full_method { - req1 := struct { - TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` - }{TextDocument: req.TextDocument} - if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { - return err - } - filterSemanticTokensInRange(resp, req.Range) - } else { +// Some language servers do not provide semanticTokens/range. +// In that case, we fall back to semanticTokens/full and then filter the tokens manually. +func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens) error { + if cli.hasSemanticTokensRange { if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, resp); err != nil { return err } + return nil + } + // fall back to semanticTokens/full + req1 := struct { + TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` + }{TextDocument: req.TextDocument} + if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { + return err } + filterSemanticTokensInRange(resp, req.Range) return nil } @@ -355,7 +355,7 @@ func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, } var resp SemanticTokens - if err := cli.getSemanticTokensRange(ctx, req, &resp, cli.Language == uniast.Cxx); err != nil { + if err := cli.getSemanticTokensRange(ctx, req, &resp); err != nil { return nil, err } diff --git a/lang/python/lib.go b/lang/python/lib.go index 2639787..f01f5ca 100644 --- a/lang/python/lib.go +++ b/lang/python/lib.go @@ -21,10 +21,10 @@ import ( "github.com/cloudwego/abcoder/lang/utils" ) -const MaxWaitDuration = 5 * time.Minute +const MaxWaitDuration = 5 * time.Second func GetDefaultLSP() (lang uniast.Language, name string) { - // needs to use the pylsp from https://github.com/python-lsp/python-lsp-server/pull/533 + // Use custom PyLSP. return uniast.Python, "pylsp" } diff --git a/lang/python/spec.go b/lang/python/spec.go index fbbfe02..fb14cfb 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -15,12 +15,19 @@ package python import ( + "fmt" + "os" + "path/filepath" + "strings" + lsp "github.com/cloudwego/abcoder/lang/lsp" "github.com/cloudwego/abcoder/lang/uniast" ) type PythonSpec struct { - repo string + repo string + topModuleName string + topModulePath string } func NewPythonSpec() *PythonSpec { @@ -28,23 +35,109 @@ func NewPythonSpec() *PythonSpec { } func (c *PythonSpec) WorkSpace(root string) (map[string]string, error) { - panic("TODO") + c.repo = root + rets := map[string]string{} + absPath, err := filepath.Abs(root) + if err != nil { + return nil, err + } + + num_projfiles := 0 + scanner := func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + base := filepath.Base(path) + if base == "pyproject.toml" { + num_projfiles++ + if num_projfiles > 1 { + panic("multiple pyproject.toml files found") + } + } + return nil + } + if err := filepath.Walk(root, scanner); err != nil { + return nil, err + } + + c.topModulePath = absPath + // TODO: find a way to infer the module (project) name. + c.topModuleName = "current" + rets[c.topModuleName] = c.topModulePath + return rets, nil } +// returns: modName, pkgPath, error func (c *PythonSpec) NameSpace(path string) (string, string, error) { - panic("TODO") + if strings.HasPrefix(path, c.topModulePath) { + // internal module + modName := c.topModuleName + relPath, err := filepath.Rel(c.topModulePath, path) + if err != nil { + return "", "", err + } + // todo: handle __init__.py + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + return modName, pkgPath, nil + } + + // XXX: hardcode + if strings.HasSuffix(path, "stdlib/3/builtins.pyi") { + // builtin module + return "builtins", "builtins", nil + } + + // XXX: hardcoded python path + condaPrefix := "/home/zhenyang/anaconda3/envs/abcoder/lib/python3.11" + if strings.HasPrefix(path, condaPrefix) { + modName := "builtins" + relPath, err := filepath.Rel(condaPrefix, path) + if err != nil { + return "", "", err + } + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + return modName, pkgPath, nil + } + + panic(fmt.Sprintf("Namespace %s", path)) } func (c *PythonSpec) ShouldSkip(path string) bool { - panic("TODO") + if !strings.HasSuffix(path, ".py") { + return true + } + return false +} + +func (c *PythonSpec) IsDocToken(tok lsp.Token) bool { + return tok.Type == "comment" } func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { - panic("TODO") + for i, t := range sym.Tokens { + if c.IsDocToken(t) { + continue + } + for _, m := range t.Modifiers { + if m == "declaration" { + return i + } + } + } + return -1 } func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { - panic("TODO") + typ := tok.Type + if strings.HasPrefix(tok.Text, "from ") || strings.HasPrefix(tok.Text, "import ") { + // Python LSP highlights imported symbols as function/types + // We decide that imported symbols are not entities. + // In fact, they ARE, just in a different place. + return false + } + return typ == "function" || typ == "variable" || typ == "property" || typ == "class" || typ == "type" } func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { @@ -52,37 +145,180 @@ func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { } func (c *PythonSpec) TokenKind(tok lsp.Token) lsp.SymbolKind { - panic("TODO") + switch tok.Type { + case "namespace": + return lsp.SKNamespace + case "type": + return lsp.SKObject // no direct match; mapped to Object conservatively + case "class": + return lsp.SKClass + case "enum": + return lsp.SKEnum + case "interface": + return lsp.SKInterface + case "struct": + return lsp.SKStruct + case "typeParameter": + return lsp.SKTypeParameter + case "parameter": + return lsp.SKVariable + case "variable": + return lsp.SKVariable + case "property": + return lsp.SKProperty + case "enumMember": + return lsp.SKEnumMember + case "event": + return lsp.SKEvent + case "function": + return lsp.SKFunction + case "method": + return lsp.SKMethod + case "macro": + return lsp.SKFunction + case "string": + return lsp.SKString + case "number": + return lsp.SKNumber + case "operator": + return lsp.SKOperator + default: + return lsp.SKUnknown + } } func (c *PythonSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { - panic("TODO") + // XXX: maybe just use __main__? + return sym.Kind == lsp.SKFunction && sym.Name == "main" } func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { - panic("TODO") + // Same as in IsEntityToken, we do not consider imported symbols as entities. + if strings.HasPrefix(sym.Text, "from ") || strings.HasPrefix(sym.Text, "import ") { + return false + } + typ := sym.Kind + return typ == lsp.SKObject || typ == lsp.SKMethod || typ == lsp.SKFunction || typ == lsp.SKVariable || + typ == lsp.SKStruct || typ == lsp.SKEnum || typ == lsp.SKTypeParameter || typ == lsp.SKConstant || typ == lsp.SKClass } func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { - panic("TODO") + if strings.HasPrefix(sym.Name, "_") { + return false + } + return true } func (c *PythonSpec) HasImplSymbol() bool { - panic("TODO") + // Python does not have direct impl symbols + return false } func (c *PythonSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { panic("TODO") } +// returns: receiver, typeParams, inputParams, outputParams func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { - panic("TODO") + // no receiver. no type params in python + // reference: https://docs.python.org/3/reference/grammar.html + receiver := -1 + // python actually has these but TODO + typeParams := []int{} + + // Hell, manually parse function text to get locations of key tokens since LSP does not support this... + // + // state 0: goto state 1 when we see a def + // state 1: goto state 2 when we see a ( + // state 2: we're in the param list. + // collect input params by checking entity tokens. + // goto state 3 when we see a ) + // state 3: collect output params. + // finish when we see a : + state := 0 + paren_depth := 0 + invalidpos := lsp.Position{ + Line: -1, + Character: -1, + } + // defpos := invalidpos + lparenpos := invalidpos + rparenpos := invalidpos + bodypos := invalidpos + curpos := sym.Location.Range.Start + for i := range len(sym.Text) { + switch state { + case 0: + if i+4 >= len(sym.Text) { + // function text does not contain a def + // should be an import + return -1, []int{}, []int{}, []int{} + } + next4chars := sym.Text[i : i+4] + // heuristics should work with reasonable python code + if next4chars == "def " { + // defpos = curpos + state = 1 + } + case 1: + if sym.Text[i] == '(' { + lparenpos = curpos + paren_depth = 1 + state = 2 + } + case 2: + if sym.Text[i] == ')' { + rparenpos = curpos + paren_depth -= 1 + if paren_depth == 0 { + state = 3 + } + } + case 3: + if sym.Text[i] == ':' { + bodypos = curpos + state = -1 + } + } + if sym.Text[i] == '\n' { + curpos.Line++ + curpos.Character = 0 + } else { + curpos.Character++ + } + } + + paramsrange := lsp.Range{ + Start: lparenpos, + End: rparenpos, + } + returnrange := lsp.Range{ + Start: rparenpos, + End: bodypos, + } + inputParams := []int{} + outputParams := []int{} + for i, t := range sym.Tokens { + if paramsrange.Include(t.Location.Range) { + if c.IsEntityToken(t) { + inputParams = append(inputParams, i) + } + } + if returnrange.Include(t.Location.Range) { + if c.IsEntityToken(t) { + outputParams = append(outputParams, i) + } + } + } + + return receiver, typeParams, inputParams, outputParams } func (c *PythonSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (string, error) { panic("TODO") } +// TODO! func (c *PythonSpec) FileImports(content []byte) ([]uniast.Import, error) { - panic("TODO") + return nil, nil } From 4daab2fc3bfb4317a6da872e3a3bd4b57337a217 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 25 Jul 2025 18:00:01 +0800 Subject: [PATCH 06/13] feat: support classes for python --- lang/collect/collect.go | 4 +- lang/python/spec.go | 82 ++++++++++++++++++++++++++++++++++------- 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index f8cadbb..6afbb5b 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -580,11 +580,13 @@ func (c *Collector) collectImpl(ctx context.Context, sym *DocumentSymbol, depth } } var impl string + // HACK: impl head for Rust. if fn > 0 && fn < len(sym.Tokens) { impl = ChunkHead(sym.Text, sym.Location.Range.Start, sym.Tokens[fn].Location.Range.Start) } + // HACK: implhead for Python. Should actually be provided by the language spec. if impl == "" || len(impl) < len(sym.Name) { - impl = sym.Name + impl = fmt.Sprintf("class %s {\n", sym.Name) } // search all methods for _, method := range c.syms { diff --git a/lang/python/spec.go b/lang/python/spec.go index fb14cfb..de002c6 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -203,6 +203,10 @@ func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { } func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { + // builtin methods are exported + if strings.HasPrefix(sym.Name, "__") && strings.HasSuffix(sym.Name, "__") { + return true + } if strings.HasPrefix(sym.Name, "_") { return false } @@ -210,17 +214,70 @@ func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { } func (c *PythonSpec) HasImplSymbol() bool { - // Python does not have direct impl symbols - return false + return true } +func invalidPos() lsp.Position { + return lsp.Position{ + Line: -1, + Character: -1, + } +} + +// returns interface, receiver, first method func (c *PythonSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { - panic("TODO") + // reference: https://docs.python.org/3/reference/grammar.html + if sym.Kind != lsp.SKClass { + return -1, -1, -1 + } + + implType := -1 + receiverType := -1 + firstMethod := -1 + + // state 0: goto state -1 when we see a 'class' + state := 0 + clsnamepos := invalidPos() + curpos := sym.Location.Range.Start + for i := range len(sym.Text) { + if state == -1 { + break + } + switch state { + case 0: + if i+6 >= len(sym.Text) { + // class text does not contain a 'class' + // should be an import + return -1, -1, -1 + } + next6chars := sym.Text[i : i+6] + // heuristics should work with reasonable python code + if next6chars == "class " { + clsnamepos = curpos + state = -1 + } + } + if sym.Text[i] == '\n' { + curpos.Line++ + curpos.Character = 0 + } else { + curpos.Character++ + } + } + + for i, t := range sym.Tokens { + if receiverType == -1 && clsnamepos.Less(t.Location.Range.Start) { + receiverType = i + } + } + + return implType, receiverType, firstMethod } // returns: receiver, typeParams, inputParams, outputParams func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { - // no receiver. no type params in python + // FunctionSymbol do not return receivers. + // TODO type params in python (nobody uses them) // reference: https://docs.python.org/3/reference/grammar.html receiver := -1 // python actually has these but TODO @@ -237,20 +294,19 @@ func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, // finish when we see a : state := 0 paren_depth := 0 - invalidpos := lsp.Position{ - Line: -1, - Character: -1, - } - // defpos := invalidpos - lparenpos := invalidpos - rparenpos := invalidpos - bodypos := invalidpos + // defpos := invalidPos() + lparenpos := invalidPos() + rparenpos := invalidPos() + bodypos := invalidPos() curpos := sym.Location.Range.Start for i := range len(sym.Text) { + if state == -1 { + break + } switch state { case 0: if i+4 >= len(sym.Text) { - // function text does not contain a def + // function text does not contain a 'def' // should be an import return -1, []int{}, []int{}, []int{} } From 43e3cffbde2d02c2f41234ee765c391a95be2401 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 25 Jul 2025 18:01:41 +0800 Subject: [PATCH 07/13] fix(python): allow ignore bultin, separate site-packages from builtin --- lang/collect/collect.go | 23 +++++++++++++++++++++++ lang/python/spec.go | 18 +++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 6afbb5b..7b77f8c 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -113,7 +113,30 @@ func NewCollector(repo string, cli *LSPClient) *Collector { return ret } +func (c *Collector) configureLSP(ctx context.Context) { + // XXX: should be put in language specification + if c.Language == uniast.Python { + if !c.NeedStdSymbol { + if c.Language == uniast.Python { + conf := map[string]interface{}{ + "settings": map[string]interface{}{ + "pylsp": map[string]interface{}{ + "plugins": map[string]interface{}{ + "jedi_definition": map[string]interface{}{ + "follow_builtin_definitions": false, + }, + }, + }, + }, + } + c.cli.Notify(ctx, "workspace/didChangeConfiguration", conf) + } + } + } +} + func (c *Collector) Collect(ctx context.Context) error { + c.configureLSP(ctx) excludes := make([]string, len(c.Excludes)) for i, e := range c.Excludes { if !filepath.IsAbs(e) { diff --git a/lang/python/spec.go b/lang/python/spec.go index de002c6..32f6d8d 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -91,6 +91,22 @@ func (c *PythonSpec) NameSpace(path string) (string, string, error) { // XXX: hardcoded python path condaPrefix := "/home/zhenyang/anaconda3/envs/abcoder/lib/python3.11" if strings.HasPrefix(path, condaPrefix) { + if strings.HasPrefix(path, condaPrefix+"/site-packages") { + // external module + relPath, err := filepath.Rel(condaPrefix+"/site-packages", path) + if err != nil { + return "", "", err + } + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + modPath := strings.Split(pkgPath, ".") + if len(modPath) >= 1 { + modName := modPath[0] + return modName, pkgPath, nil + } + panic(fmt.Sprintf("Malformed Namespace %s, pkgPath %s", path, pkgPath)) + } + // builtin module modName := "builtins" relPath, err := filepath.Rel(condaPrefix, path) if err != nil { @@ -101,7 +117,7 @@ func (c *PythonSpec) NameSpace(path string) (string, string, error) { return modName, pkgPath, nil } - panic(fmt.Sprintf("Namespace %s", path)) + panic(fmt.Sprintf("Unhandled Namespace %s", path)) } func (c *PythonSpec) ShouldSkip(path string) bool { From 10fef7a37f612cdebdbdb7fbcda5949a7f8f4472 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 25 Jul 2025 19:06:11 +0800 Subject: [PATCH 08/13] feat: add submodule link to python LSP --- .gitmodules | 4 ++++ pylsp | 1 + 2 files changed, 5 insertions(+) create mode 100644 .gitmodules create mode 160000 pylsp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..51d3ee3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "pylsp"] + path = pylsp + url = git@github.com:Hoblovski/python-lsp-server.git + branch = abc diff --git a/pylsp b/pylsp new file mode 160000 index 0000000..902fb5b --- /dev/null +++ b/pylsp @@ -0,0 +1 @@ +Subproject commit 902fb5b3c91f96f6302d9639a296733451584679 From ba18dc967d6a217d27fb88d64c8c57f764c7af51 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Mon, 28 Jul 2025 23:02:20 +0800 Subject: [PATCH 09/13] feat(python): support FileImports --- lang/collect/collect.go | 5 --- lang/python/spec.go | 47 +++++++++++++++++------ testdata/pyfileimports/main.py | 17 ++++++++ testdata/{pyimport => pyimexport}/main.py | 0 4 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 testdata/pyfileimports/main.py rename testdata/{pyimport => pyimexport}/main.py (100%) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 7b77f8c..6d6ae6e 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -195,11 +195,6 @@ func (c *Collector) Collect(ctx context.Context) error { if err != nil { return err } - // HACK: skip imported symbols (do not expose imported symbols in Python) - // TODO: make this behavior consistent in python and rust (where we have pub use vs use) - if c.Language == uniast.Python && (strings.HasPrefix(content, "from ") || strings.HasPrefix(content, "import ")) { - continue - } // collect tokens tokens, err := c.cli.SemanticTokens(ctx, sym.Location) if err != nil { diff --git a/lang/python/spec.go b/lang/python/spec.go index 32f6d8d..ff31586 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -18,6 +18,7 @@ import ( "fmt" "os" "path/filepath" + "regexp" "strings" lsp "github.com/cloudwego/abcoder/lang/lsp" @@ -147,12 +148,6 @@ func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { typ := tok.Type - if strings.HasPrefix(tok.Text, "from ") || strings.HasPrefix(tok.Text, "import ") { - // Python LSP highlights imported symbols as function/types - // We decide that imported symbols are not entities. - // In fact, they ARE, just in a different place. - return false - } return typ == "function" || typ == "variable" || typ == "property" || typ == "class" || typ == "type" } @@ -209,10 +204,6 @@ func (c *PythonSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { } func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { - // Same as in IsEntityToken, we do not consider imported symbols as entities. - if strings.HasPrefix(sym.Text, "from ") || strings.HasPrefix(sym.Text, "import ") { - return false - } typ := sym.Kind return typ == lsp.SKObject || typ == lsp.SKMethod || typ == lsp.SKFunction || typ == lsp.SKVariable || typ == lsp.SKStruct || typ == lsp.SKEnum || typ == lsp.SKTypeParameter || typ == lsp.SKConstant || typ == lsp.SKClass @@ -390,7 +381,39 @@ func (c *PythonSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (str panic("TODO") } -// TODO! func (c *PythonSpec) FileImports(content []byte) ([]uniast.Import, error) { - return nil, nil + // Reference: + // https://docs.python.org/3/reference/grammar.html + // There are two types of imports in Python: + // import-as: on ONE line + // import xxx as x, yyy as y + // from-import: on ONE line + // from ... import * + // from ... import xxx as x, yyy as y + // or on POSSIBLY MULTIPLE lines, enclosed by parentheses + // from ... import ( xxx, yyy as y ... ) + // And imports are simple stmts, so they MUST end with \n. + patterns := []string{ + // Matches: import (on a single line) + `(?m)^import\s+(.*)$`, + // Matches: from import (on a single line, without parentheses) + `(?m)^from\s+(.*?)\s+import\s+([^()\n]*)$`, + // Matches: from import ( ) where can span multiple lines + `(?m)^from\s+(.*?)\s+import\s+\(([\s\S]*?)\)$`, + } + + res := []uniast.Import{} + for _, p := range patterns { + re, err := regexp.Compile(p) + if err != nil { + return nil, fmt.Errorf("error compiling regex pattern '%s': %w", p, err) + } + matches := re.FindAllStringSubmatch(string(content), -1) // -1 to find all non-overlapping matches + for _, match := range matches { + res = append(res, uniast.Import{ + Path: match[0], + }) + } + } + return res, nil } diff --git a/testdata/pyfileimports/main.py b/testdata/pyfileimports/main.py new file mode 100644 index 0000000..3c39b97 --- /dev/null +++ b/testdata/pyfileimports/main.py @@ -0,0 +1,17 @@ +import abc +import base64 + +from os import path +from sys import ( + argv, + exit +) +from collections import defaultdict + +from math import ( +cos, +sin + + + ) +import copy diff --git a/testdata/pyimport/main.py b/testdata/pyimexport/main.py similarity index 100% rename from testdata/pyimport/main.py rename to testdata/pyimexport/main.py From 967c500c21163b0af3643329ffdf536e31265b58 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Tue, 29 Jul 2025 17:29:01 +0800 Subject: [PATCH 10/13] tests: update python tests --- testdata/pyfileimports/main.py | 16 ++++++++++++++ testdata/pythonoperator/main.py | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 testdata/pythonoperator/main.py diff --git a/testdata/pyfileimports/main.py b/testdata/pyfileimports/main.py index 3c39b97..db0c7db 100644 --- a/testdata/pyfileimports/main.py +++ b/testdata/pyfileimports/main.py @@ -1,3 +1,19 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# fmt: off import abc import base64 diff --git a/testdata/pythonoperator/main.py b/testdata/pythonoperator/main.py new file mode 100644 index 0000000..b7e78dc --- /dev/null +++ b/testdata/pythonoperator/main.py @@ -0,0 +1,38 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class A: + def __init__(self): + self.value = 10 + + def get_value(self): + return self.value + + def __add__(self, other): + if isinstance(other, A): + return A(self.value + other.value) + return NotImplemented + + +def main(): + a1 = A() + a2 = A() + + print("Value of a1:", a1.get_value()) + print("Value of a2:", a2.get_value()) + + # There should be a dependency from main to A.__add__ + a3 = a1 + a2 + print("Value of a3 (a1 + a2):", a3.get_value()) From 1d1f046e8ac16e2bcd4e02280a53b8dfdba428bf Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 30 Jul 2025 15:47:26 +0800 Subject: [PATCH 11/13] chore: update pylsp and docs --- README.md | 32 ++++++++++++++++++++++++++++++++ pylsp | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f22dab..61a9199 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ see [UniAST Specification](docs/uniast-zh.md) # Quick Start +It is easiest to start with Golang. For other languages, see language specific installations below. ## Use ABCoder as a MCP server @@ -124,6 +125,37 @@ ABCoder currently supports the following languages: | C | ✅ | ❌ | | Python | Coming Soon | Coming Soon | +# Language Specific Installations +## Python +* Install the language server: a custom pylsp (as a git submodule). + You might use conda or venv to keep a separate python package environment. +```bash +$ git submodule init +$ git submodule update +$ cd pylsp +$ pip install -e . # you might do this in a separate conda/venv environment +$ export PATH=$(realpath ./bin):$PATH +$ pylsp --version # verify pylsp installation is successful +``` + +* Verify the installation: +``` +# start from abcoder repository root dir +$ go build +$ ./ +``` + +## Rust +* Install the rust language via [rustup](https://www.rust-lang.org/tools/install). +* Install the language server: rust-analyzer: +```bash +$ rustup component add rust-analyzer +``` +* Verify installation: +```bash +$ go build +$ ./abcoder parse rust testdata/rust2 +``` # Getting Involved diff --git a/pylsp b/pylsp index 902fb5b..0e1ed4c 160000 --- a/pylsp +++ b/pylsp @@ -1 +1 @@ -Subproject commit 902fb5b3c91f96f6302d9639a296733451584679 +Subproject commit 0e1ed4c2785b5ff98148ded317b062ec31b932fc From 4461972746413c6eabf157afb5d7c3f40b3b7071 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 30 Jul 2025 16:33:31 +0800 Subject: [PATCH 12/13] fix: use NewSpec and python sysPath --- lang/collect/collect.go | 6 +++--- lang/python/lib.go | 4 +--- lang/python/spec.go | 48 ++++++++++++++++++++--------------------- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 6d6ae6e..9f0d89f 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -86,11 +86,11 @@ type functionInfo struct { func switchSpec(l uniast.Language) LanguageSpec { switch l { case uniast.Rust: - return &rust.RustSpec{} + return rust.NewRustSpec() case uniast.Cxx: - return &cxx.CxxSpec{} + return cxx.NewCxxSpec() case uniast.Python: - return &python.PythonSpec{} + return python.NewPythonSpec() default: panic(fmt.Sprintf("unsupported language %s", l)) } diff --git a/lang/python/lib.go b/lang/python/lib.go index f01f5ca..7b19d4f 100644 --- a/lang/python/lib.go +++ b/lang/python/lib.go @@ -24,15 +24,13 @@ import ( const MaxWaitDuration = 5 * time.Second func GetDefaultLSP() (lang uniast.Language, name string) { - // Use custom PyLSP. return uniast.Python, "pylsp" } func CheckRepo(repo string) (string, time.Duration) { openfile := "" - // TODO: check if the project compiles. - // NOTICE: wait for Rust projects based on code files + // Give the LSP sometime to initialize _, size := utils.CountFiles(repo, ".py", "SKIPDIR") wait := 2*time.Second + time.Second*time.Duration(size/1024) if wait > MaxWaitDuration { diff --git a/lang/python/spec.go b/lang/python/spec.go index ff31586..5cd1535 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -17,10 +17,13 @@ package python import ( "fmt" "os" + "os/exec" "path/filepath" "regexp" + "sort" "strings" + "github.com/cloudwego/abcoder/lang/log" lsp "github.com/cloudwego/abcoder/lang/lsp" "github.com/cloudwego/abcoder/lang/uniast" ) @@ -29,10 +32,23 @@ type PythonSpec struct { repo string topModuleName string topModulePath string + sysPaths []string } func NewPythonSpec() *PythonSpec { - return &PythonSpec{} + cmd := exec.Command("python", "-c", "import sys ; print('\\n'.join(sys.path))") + output, err := cmd.Output() + if err != nil { + log.Error("Failed to get sys.path: %v\n", err) + return nil + } + sysPaths := strings.Split(string(output), "\n") + // Match more specific paths first + sort.Slice(sysPaths, func(i, j int) bool { + return len(sysPaths[i]) > len(sysPaths[j]) + }) + log.Info("PythonSpec: using sysPaths %+v\n", sysPaths) + return &PythonSpec{sysPaths: sysPaths} } func (c *PythonSpec) WorkSpace(root string) (map[string]string, error) { @@ -83,18 +99,11 @@ func (c *PythonSpec) NameSpace(path string) (string, string, error) { return modName, pkgPath, nil } - // XXX: hardcode - if strings.HasSuffix(path, "stdlib/3/builtins.pyi") { - // builtin module - return "builtins", "builtins", nil - } - - // XXX: hardcoded python path - condaPrefix := "/home/zhenyang/anaconda3/envs/abcoder/lib/python3.11" - if strings.HasPrefix(path, condaPrefix) { - if strings.HasPrefix(path, condaPrefix+"/site-packages") { - // external module - relPath, err := filepath.Rel(condaPrefix+"/site-packages", path) + for _, sysPath := range c.sysPaths { + log.Error("PythonSpec: path %s sysPath %s\n", path, sysPath) + if strings.HasPrefix(path, sysPath) { + relPath, err := filepath.Rel(sysPath, path) + log.Error("PythonSpec: matched relPath %s, sysPath %s\n", relPath, sysPath) if err != nil { return "", "", err } @@ -107,18 +116,9 @@ func (c *PythonSpec) NameSpace(path string) (string, string, error) { } panic(fmt.Sprintf("Malformed Namespace %s, pkgPath %s", path, pkgPath)) } - // builtin module - modName := "builtins" - relPath, err := filepath.Rel(condaPrefix, path) - if err != nil { - return "", "", err - } - relPath = strings.TrimSuffix(relPath, ".py") - pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") - return modName, pkgPath, nil } - - panic(fmt.Sprintf("Unhandled Namespace %s", path)) + log.Error("Namespace not found for path: %s\n", path) + return "", "", fmt.Errorf("namespace not found for path: %s", path) } func (c *PythonSpec) ShouldSkip(path string) bool { From c483270b6d20b996ec7e536bd8b25c3dfd6c69e0 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 30 Jul 2025 17:14:15 +0800 Subject: [PATCH 13/13] docs: docs on how to install LSP --- README.md | 46 ++++++------------------------------- docs/lsp-installation-cn.md | 44 +++++++++++++++++++++++++++++++++++ docs/lsp-installation-en.md | 43 ++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 39 deletions(-) create mode 100644 docs/lsp-installation-cn.md create mode 100644 docs/lsp-installation-en.md diff --git a/README.md b/README.md index 61a9199..a1859af 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,6 @@ see [UniAST Specification](docs/uniast-zh.md) # Quick Start -It is easiest to start with Golang. For other languages, see language specific installations below. - ## Use ABCoder as a MCP server 1. Install ABCoder: @@ -41,13 +39,15 @@ It is easiest to start with Golang. For other languages, see language specific i abcoder parse {language} {repo-path} > xxx.json ``` - For example: + For example, to parse a Go repository: ```bash git clone https://github.com/cloudwego/localsession.git localsession abcoder parse go localsession -o /abcoder-asts/localsession.json ``` + To parse repositories in other languages, [install the corresponding langauge server first](./docs/lsp-installation-en.md). + 3. Integrate ABCoder's MCP tools into your AI agent. ```json @@ -113,49 +113,17 @@ $ exit - NOTICE: This feature is Work-In-Progress. It only supports code analysis at present. - # Supported Languages ABCoder currently supports the following languages: | Language | Parser | Writer | | -------- | ----------- | ----------- | -| Go | ✅ | ✅ | -| Rust | ✅ | Coming Soon | -| C | ✅ | ❌ | -| Python | Coming Soon | Coming Soon | - -# Language Specific Installations -## Python -* Install the language server: a custom pylsp (as a git submodule). - You might use conda or venv to keep a separate python package environment. -```bash -$ git submodule init -$ git submodule update -$ cd pylsp -$ pip install -e . # you might do this in a separate conda/venv environment -$ export PATH=$(realpath ./bin):$PATH -$ pylsp --version # verify pylsp installation is successful -``` - -* Verify the installation: -``` -# start from abcoder repository root dir -$ go build -$ ./ -``` +| Go | ✅ | ✅ | +| Rust | ✅ | Coming Soon | +| C | ✅ | Coming Soon | +| Python | ✅ | Coming Soon | -## Rust -* Install the rust language via [rustup](https://www.rust-lang.org/tools/install). -* Install the language server: rust-analyzer: -```bash -$ rustup component add rust-analyzer -``` -* Verify installation: -```bash -$ go build -$ ./abcoder parse rust testdata/rust2 -``` # Getting Involved diff --git a/docs/lsp-installation-cn.md b/docs/lsp-installation-cn.md new file mode 100644 index 0000000..5e9d755 --- /dev/null +++ b/docs/lsp-installation-cn.md @@ -0,0 +1,44 @@ +# Language server 安装 +为了解析仓库中符号之间的依赖,abcoder parser 需要使用各语言的 language server。 +运行 parser 之前请安装对应的 language server。 + +语言和 language server 的对应关系如下 + +| 语言 | Language server | 可执行文件 | +| --- | --- | --- | +| Go | 不使用 LSP,使用内置 parser | / | +| Rust | rust-analyzer | rust-analyzer | +| Python | (修改后的) python-lsp-server | pylsp | +| C | clangd-18 | clangd-18 | + +按如下教程完成安装后,在运行 abcoder 前请确保 PATH 中有对应可执行文件 + +## Rust +* 先通过 [rustup](https://www.rust-lang.org/tools/install) 安装 Rust 语言 +* 安装 rust-analyzer + ```bash + $ rustup component add rust-analyzer + $ rust-analyzer --version # 验证安装成功 + ``` + +## Python +* 安装 Python 3.9+ +* 从 git submodule 安装 pylsp + ```bash + $ git submodule init + $ git submodule update + $ cd pylsp + $ pip install -e . # 可以考虑在单独的 conda/venv 环境中执行 + $ export PATH=$(realpath ./bin):$PATH # 放到 .rc 文件里,或每次运行 abcoder 前都设置一下 + $ pylsp --version # 验证安装成功 + ``` + +## C +* ubuntu 24.04 或以后版本: 可以直接从 apt 安装 + ```bash + $ sudo apt install clangd-18 + ``` + +* 其他发行版:手动编译、或从 [llvm 官方网站](https://releases.llvm.org/download.html) 下载预编译的版本。 + clangd 在 clang-tools-extra 中。 + diff --git a/docs/lsp-installation-en.md b/docs/lsp-installation-en.md new file mode 100644 index 0000000..13b6862 --- /dev/null +++ b/docs/lsp-installation-en.md @@ -0,0 +1,43 @@ +# Language Server Installation + +To parse dependencies between symbols in a repository, the abcoder parser requires the use of language servers for various languages. Please install the corresponding language server before running the parser. + +The mapping between languages and language servers is as follows: + +| Language | Language Server | Executable | +| -------- | ------------------------- | --------------- | +| Go | Does not use LSP, uses built-in parser | / | +| Rust | rust-analyzer | rust-analyzer | +| Python | (Modified) python-lsp-server | pylsp | +| C | clangd-18 | clangd-18 | + +Ensure the corresponding executable is in PATH before running abcoder. + +## Rust +* First, install the Rust language via [rustup](https://www.rust-lang.org/tools/install). +* Install rust-analyzer: + ```bash + $ rustup component add rust-analyzer + $ rust-analyzer --version # Verify successful installation + ``` + +## Python +* Install Python 3.9+ +* Install pylsp from the git submodule: + ```bash + $ git submodule init + $ git submodule update + $ cd pylsp + $ pip install -e . # Consider executing in a separate conda/venv environment + $ export PATH=$(realpath ./bin):$PATH # Add this to your .rc file, or set it before each abcoder run + $ pylsp --version # Verify successful installation + ``` + +## C +* Ubuntu 24.04 or later: Install directly from apt: + ```bash + $ sudo apt install clangd-18 + ``` + +* Other distributions: Use a manual installation. + Or download a pre-compiled version from the [LLVM official website](https://releases.llvm.org/download.html). clangd is in `clang-tools-extra`.