Skip to content

Commit f17d662

Browse files
committed
update tools
1 parent 1152dbb commit f17d662

File tree

6 files changed

+395
-93
lines changed

6 files changed

+395
-93
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ log.log
66
run.sh
77
tester/api_config/**/test_log*
88
tester/api_config/api_config*
9+
tester/api_config/output
910
tools/api_tracer/.huggingface
1011
tools/api_tracer/trace_output*

tools/get_api_config_set.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

tools/get_api_set.py

Lines changed: 94 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,99 @@
11
# 获取 api 集合小工具
22
# @author: cangtianhuang
3-
# @date: 2025-06-08
3+
# @date: 2025-09-26
44

5+
import argparse
56
from pathlib import Path
67

7-
INPUT_PATH = Path("tester/api_config/api_config_tmp.txt")
8-
OUTPUT_PATH = INPUT_PATH
9-
10-
api_apis = set()
11-
count = 0
12-
try:
13-
with open(INPUT_PATH, "r") as f:
14-
for line in f:
15-
line = line.strip()
16-
if line:
17-
api_api = line.split("(", 1)[0]
18-
api_apis.add(api_api)
19-
count += 1
20-
except Exception as err:
21-
print(f"Error reading {INPUT_PATH}: {err}", flush=True)
22-
exit(0)
23-
print(f"Read {count} apis from {INPUT_PATH}", flush=True)
24-
25-
try:
26-
if OUTPUT_PATH != INPUT_PATH and OUTPUT_PATH.exists():
27-
with open(OUTPUT_PATH, "r") as f:
28-
api_apis.update(line.strip() for line in f if line.strip())
29-
with open(OUTPUT_PATH, "w") as f:
30-
f.writelines(f"{line}\n" for line in sorted(api_apis))
31-
except Exception as err:
32-
print(f"Error writing {OUTPUT_PATH}: {err}", flush=True)
33-
exit(0)
34-
print(f"Write {len(api_apis)} apis to {OUTPUT_PATH}", flush=True)
8+
9+
def collect_input_files(input_paths):
10+
files = []
11+
for input_path in input_paths:
12+
path = Path(input_path)
13+
if path.is_file():
14+
files.append(path)
15+
elif path.is_dir():
16+
text_files = list(path.rglob("*.txt"))
17+
files.extend(text_files)
18+
print(f"Found {len(text_files)} .txt files in directory: {path}")
19+
else:
20+
print(f"Warning: {path} does not exist or is not accessible")
21+
return files
22+
23+
24+
def extract_apis(input_paths, output_dir):
25+
input_files = collect_input_files(input_paths)
26+
if not input_files:
27+
print("No valid input files found")
28+
return
29+
30+
print(f"Processing {len(input_files)} files...")
31+
32+
api_names = set()
33+
total_processed = 0
34+
35+
for input_file in input_files:
36+
try:
37+
content = input_file.read_text(encoding="utf-8")
38+
file_count = 0
39+
40+
for line in content.splitlines():
41+
line = line.strip()
42+
if line and "(" in line:
43+
api_name = line.split("(", 1)[0].strip()
44+
if api_name:
45+
api_names.add(api_name)
46+
file_count += 1
47+
total_processed += 1
48+
49+
print(f"Processed {file_count} APIs from {input_file}")
50+
except Exception as err:
51+
print(f"Error reading {input_file}: {err}")
52+
continue
53+
54+
if not api_names:
55+
print("No valid APIs found")
56+
return
57+
58+
print(f"Total processed: {total_processed}, Unique APIs: {len(api_names)}")
59+
60+
sorted_apis = sorted(api_names)
61+
output_path = Path(output_dir)
62+
output_path.mkdir(parents=True, exist_ok=True)
63+
64+
output_file = output_path / "api_extracted.txt"
65+
output_file.write_text("\n".join(sorted_apis) + "\n", encoding="utf-8")
66+
print(f"Wrote {len(sorted_apis)} API names to {output_file}")
67+
68+
69+
def main():
70+
default_input = ["tester/api_config/api_config_tmp.txt"]
71+
default_output = "tester/api_config/output"
72+
73+
parser = argparse.ArgumentParser(
74+
description="API提取工具",
75+
formatter_class=argparse.RawDescriptionHelpFormatter,
76+
epilog="""
77+
使用示例:
78+
python %(prog)s -i config.txt # 处理单个配置文件
79+
python %(prog)s -i configs/ # 处理目录下所有.txt文件
80+
python %(prog)s -i . -o output/ # 当前目录
81+
""",
82+
)
83+
parser.add_argument(
84+
"--input",
85+
"-i",
86+
nargs="+",
87+
default=default_input,
88+
help="输入路径列表(支持文件或目录)",
89+
)
90+
parser.add_argument(
91+
"--output-dir", "-o", default=default_output, help="输出目录路径"
92+
)
93+
94+
args = parser.parse_args()
95+
extract_apis(args.input, args.output_dir)
96+
97+
98+
if __name__ == "__main__":
99+
main()

tools/get_config_set.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# 获取 api 配置集合小工具
2+
# @author: cangtianhuang
3+
# @date: 2025-09-26
4+
5+
import argparse
6+
from pathlib import Path
7+
8+
9+
def collect_input_files(input_paths):
10+
files = []
11+
for input_path in input_paths:
12+
path = Path(input_path)
13+
if path.is_file():
14+
files.append(path)
15+
elif path.is_dir():
16+
text_files = list(path.rglob("*.txt"))
17+
files.extend(text_files)
18+
return files
19+
20+
21+
def process_api_configs(input_paths, output_dir, max_configs_per_file=500000):
22+
input_files = collect_input_files(input_paths)
23+
if not input_files:
24+
print("No valid input files found")
25+
return
26+
27+
print(f"Processing {len(input_files)} files...")
28+
29+
api_configs = set()
30+
total_read = 0
31+
32+
for input_file in input_files:
33+
try:
34+
content = input_file.read_text(encoding="utf-8")
35+
lines = [line.strip() for line in content.splitlines() if line.strip()]
36+
api_configs.update(lines)
37+
total_read += len(lines)
38+
print(f"Read {len(lines)} configs from {input_file}")
39+
except Exception as err:
40+
print(f"Error reading {input_file}: {err}")
41+
continue
42+
43+
if not api_configs:
44+
print("No valid configs found")
45+
return
46+
47+
print(f"Total configs: {total_read}, Unique configs: {len(api_configs)}")
48+
49+
sorted_configs = sorted(api_configs)
50+
output_path = Path(output_dir)
51+
output_path.mkdir(parents=True, exist_ok=True)
52+
53+
if len(sorted_configs) <= max_configs_per_file:
54+
output_file = output_path / "api_config_merged.txt"
55+
output_file.write_text("\n".join(sorted_configs) + "\n", encoding="utf-8")
56+
print(f"Wrote {len(sorted_configs)} configs to {output_file}")
57+
else:
58+
for i in range(0, len(sorted_configs), max_configs_per_file):
59+
chunk = sorted_configs[i : i + max_configs_per_file]
60+
chunk_num = i // max_configs_per_file + 1
61+
output_file = output_path / f"api_config_merged_part{chunk_num}.txt"
62+
output_file.write_text("\n".join(chunk) + "\n", encoding="utf-8")
63+
print(f"Wrote {len(chunk)} configs to {output_file}")
64+
65+
66+
def main():
67+
default_input = ["tester/api_config/api_config_tmp.txt"]
68+
default_output = "tester/api_config/output"
69+
70+
parser = argparse.ArgumentParser(
71+
description="API配置集合整理工具",
72+
formatter_class=argparse.RawDescriptionHelpFormatter,
73+
epilog="""
74+
使用示例:
75+
python %(prog)s -i file.txt # 处理单个文件
76+
python %(prog)s -i dir/ # 处理目录下所有.txt文件
77+
python %(prog)s -i . -o output/ --max-configs 100000 # 当前目录,限制10万条/文件
78+
""",
79+
)
80+
parser.add_argument(
81+
"--input",
82+
"-i",
83+
nargs="+",
84+
default=default_input,
85+
help="输入路径列表(支持文件或目录)",
86+
)
87+
parser.add_argument(
88+
"--output-dir", "-o", default=default_output, help="输出目录路径"
89+
)
90+
parser.add_argument(
91+
"--max-configs", type=int, default=500000, help="单个输出文件最大配置数量"
92+
)
93+
94+
args = parser.parse_args()
95+
process_api_configs(args.input, args.output_dir, args.max_configs)
96+
97+
98+
if __name__ == "__main__":
99+
main()

tools/remove_configs.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# 移除指定配置小工具
2+
# @author: cangtianhuang
3+
# @date: 2025-09-26
4+
5+
import argparse
6+
from pathlib import Path
7+
8+
9+
def collect_input_files(input_paths):
10+
files = []
11+
for input_path in input_paths:
12+
path = Path(input_path)
13+
if path.is_file():
14+
files.append(path)
15+
elif path.is_dir():
16+
text_files = list(path.rglob("*.txt"))
17+
files.extend(text_files)
18+
return files
19+
20+
21+
def load_configs_to_remove(remove_config_file):
22+
configs_to_remove = set()
23+
24+
path = Path(remove_config_file)
25+
try:
26+
content = path.read_text(encoding="utf-8")
27+
lines = [line.strip() for line in content.splitlines() if line.strip()]
28+
configs_to_remove.update(lines)
29+
print(f"Loaded {len(configs_to_remove)} configs to remove from {path}")
30+
except Exception as err:
31+
print(f"Error reading remove config file {path}: {err}")
32+
raise
33+
34+
return configs_to_remove
35+
36+
37+
def remove_configs_from_files(input_paths, remove_config_file, backup=False):
38+
input_files = collect_input_files(input_paths)
39+
if not input_files:
40+
print("No valid input files found")
41+
return
42+
43+
configs_to_remove = load_configs_to_remove(remove_config_file)
44+
if not configs_to_remove:
45+
print("No configs to remove found")
46+
return
47+
48+
print(f"Processing {len(input_files)} files...")
49+
print(f"Will remove {len(configs_to_remove)} unique configs")
50+
51+
total_removed = 0
52+
files_modified = 0
53+
54+
for input_file in input_files:
55+
try:
56+
content = input_file.read_text(encoding="utf-8")
57+
original_lines = content.splitlines()
58+
59+
filtered_lines = []
60+
removed_count = 0
61+
62+
for line in original_lines:
63+
stripped_line = line.strip()
64+
if stripped_line and stripped_line in configs_to_remove:
65+
removed_count += 1
66+
else:
67+
filtered_lines.append(line)
68+
69+
if removed_count > 0:
70+
files_modified += 1
71+
total_removed += removed_count
72+
73+
if backup:
74+
backup_file = input_file.with_suffix(input_file.suffix + ".backup")
75+
backup_file.write_text(content, encoding="utf-8")
76+
print(f"Created backup: {backup_file}")
77+
78+
new_content = "\n".join(filtered_lines)
79+
if new_content and not new_content.endswith("\n"):
80+
new_content += "\n"
81+
82+
input_file.write_text(new_content, encoding="utf-8")
83+
84+
print(
85+
f"Modified {input_file}: removed {removed_count} configs, "
86+
f"remaining {len(filtered_lines)} lines"
87+
)
88+
else:
89+
print(f"No configs to remove in {input_file}")
90+
91+
except Exception as err:
92+
print(f"Error processing {input_file}: {err}")
93+
continue
94+
95+
print(f"\nSummary:")
96+
print(f"Files processed: {len(input_files)}")
97+
print(f"Files modified: {files_modified}")
98+
print(f"Total configs removed: {total_removed}")
99+
100+
101+
def main():
102+
parser = argparse.ArgumentParser(
103+
description="移除指定配置工具",
104+
formatter_class=argparse.RawDescriptionHelpFormatter,
105+
epilog="""
106+
使用示例:
107+
python config_remover.py -i input.txt -r remove_configs.txt # 从单文件删除配置
108+
python config_remover.py -i file1.txt file2.txt -r remove_configs.txt # 从多文件删除配置
109+
python config_remover.py -i ./configs/ -r remove_configs.txt # 从文件夹删除配置
110+
python config_remover.py -i input.txt -r remove_configs.txt --backup # 有备份地处理
111+
注意: 所有操作会原地修改文件。使用 --backup 选项可创建备份文件。
112+
""",
113+
)
114+
115+
parser.add_argument(
116+
"-i", "--input", nargs="+", required=True, help="待处理的文件或目录"
117+
)
118+
parser.add_argument("-r", "--remove", required=True, help="包含要删除配置的文件")
119+
parser.add_argument(
120+
"--backup", action="store_true", default=False, help="创建备份文件"
121+
)
122+
123+
args = parser.parse_args()
124+
125+
remove_configs_from_files(args.input, args.remove, args.backup)
126+
127+
128+
if __name__ == "__main__":
129+
main()

0 commit comments

Comments
 (0)