Skip to content

Commit c959734

Browse files
committed
Add generator for capstone
1 parent 7a8c572 commit c959734

File tree

4 files changed

+357
-2
lines changed

4 files changed

+357
-2
lines changed

.github/workflows/regress.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,20 @@ jobs:
201201
uses: ./.github/actions/singularity-setup
202202
- name: Generate sverilog_header code
203203
run: ./do gen:sverilog
204+
regress-gen-capstone:
205+
runs-on: ubuntu-latest
206+
env:
207+
SINGULARITY: 1
208+
steps:
209+
- name: Clone Github Repo Action
210+
uses: actions/checkout@v4
211+
- name: singularity setup
212+
uses: ./.github/actions/singularity-setup
213+
- name: Generate capstone code
214+
run: |
215+
./do gen:capstone
216+
python3 -m pip install capstone
217+
python3 tools/python-packages/udb-capstone/regression-test.py
204218
regress-cpp-unit:
205219
runs-on: ubuntu-latest
206220
env:

backends/generators/tasks.rake

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
# typed: false
12
# frozen_string_literal: true
23

34
require "udb/resolver"
4-
require 'json'
5-
require 'tempfile'
5+
require "json"
6+
require "tempfile"
67

78
directory "#{$root}/gen/go"
89
directory "#{$root}/gen/c_header"
910
directory "#{$root}/gen/sverilog"
11+
directory "#{$root}/gen/capstone"
1012

1113
def with_resolved_exception_codes(cfg_arch)
1214
# Process ERB templates in exception codes using Ruby ERB processing
@@ -127,4 +129,28 @@ namespace :gen do
127129
"--output=#{output_dir}riscv_decode_package.svh --include-all"
128130
end
129131
end
132+
133+
desc <<~DESC
134+
Generate Capstone CSR switch from RISC-V CSR definitions
135+
136+
Options:
137+
* CONFIG - Configuration name (defaults to "_")
138+
* OUTPUT_DIR - Output directory for generated Capstone code (defaults to "#{$root}/gen/capstone")
139+
DESC
140+
task capstone: "#{$root}/gen/capstone" do
141+
config_name = ENV["CONFIG"] || "_"
142+
output_dir = ENV["OUTPUT_DIR"] || "#{$root}/gen/capstone/"
143+
144+
# Ensure the output directory exists
145+
FileUtils.mkdir_p output_dir
146+
147+
# Get the arch paths based on the config
148+
resolver = Udb::Resolver.new
149+
cfg_arch = resolver.cfg_arch_for(config_name)
150+
inst_dir = cfg_arch.path / "inst"
151+
csr_dir = cfg_arch.path / "csr"
152+
153+
# Run the Capstone CSR switch generator Python script
154+
sh "#{$root}/.home/.venv/bin/python3 #{$root}/tools/python-packages/udb-capstone/generate_csr_switch.py --csr-dir=#{csr_dir} --arch=BOTH --output=#{output_dir}csr_switch.c"
155+
end
130156
end
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) Salil Mittal
4+
# SPDX-License-Identifier: BSD-3-Clause-Clear
5+
"""
6+
Generate a C function mapping RISC-V CSR numbers to names using a switch statement.
7+
"""
8+
9+
import sys
10+
import os
11+
12+
# Import functions from generator.py
13+
generator_dir = os.path.abspath(
14+
os.path.join(os.path.dirname(__file__), "../../../backends/generators")
15+
)
16+
sys.path.append(generator_dir)
17+
from generator import load_csrs
18+
19+
20+
def generate_csr_switch(csrs, output_file):
21+
with open(output_file, "w", encoding="utf-8") as f:
22+
fn_str = "/* SP" + "DX-License-Identifier: BSD-3-Clause */" + '"'
23+
fn_str = """
24+
/* Copyright (c) 2025 RISC-V International */
25+
/*
26+
* This file is auto-generated by riscv-unified-db
27+
*/
28+
static const char *getCSRSystemRegisterName(unsigned CsrNo)
29+
{
30+
switch (CsrNo) {
31+
"""
32+
for addr, name in sorted(csrs.items()):
33+
fn_str += f'\tcase 0x{addr:04x}:\n\t\treturn "{name.lower()}";\n'
34+
35+
fn_str += """ }
36+
return NULL;
37+
}
38+
"""
39+
f.write(fn_str)
40+
41+
42+
def main():
43+
import argparse
44+
45+
parser = argparse.ArgumentParser(description="Generate C switch for RISC-V CSRs")
46+
parser.add_argument(
47+
"--csr-dir",
48+
default=os.path.abspath(
49+
os.path.join(os.path.dirname(__file__), "../../../arch/csr/")
50+
),
51+
help="Directory containing CSR YAML files",
52+
)
53+
parser.add_argument(
54+
"--extensions",
55+
default="",
56+
help="Comma-separated list of enabled extensions (default: all)",
57+
)
58+
parser.add_argument(
59+
"--arch",
60+
default="BOTH",
61+
choices=["RV32", "RV64", "BOTH"],
62+
help="Target architecture (RV32, RV64, BOTH)",
63+
)
64+
parser.add_argument(
65+
"--output",
66+
default=os.path.join(os.path.dirname(__file__), "csr_switch.c"),
67+
help="Output C file name",
68+
)
69+
args = parser.parse_args()
70+
71+
enabled_extensions = (
72+
[ext.strip() for ext in args.extensions.split(",") if ext.strip()]
73+
if args.extensions
74+
else []
75+
)
76+
include_all = not enabled_extensions
77+
csrs = load_csrs(args.csr_dir, enabled_extensions, include_all, args.arch)
78+
79+
generate_csr_switch(csrs, args.output)
80+
print(f"Generated: {args.output}")
81+
82+
83+
if __name__ == "__main__":
84+
main()
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) Salil Mittal
4+
# SPDX-License-Identifier: BSD-3-Clause-Clear
5+
6+
import re
7+
import os
8+
import argparse
9+
import yaml
10+
import sys
11+
12+
# Add path to import generator.py
13+
parent_dir = f"{os.path.dirname(__file__)}/../../../backends/generators"
14+
sys.path.append(parent_dir)
15+
16+
from generator import load_csrs
17+
18+
from capstone import Cs, CS_ARCH_RISCV, CS_MODE_32
19+
20+
21+
# Parse CSRs from the switch case file
22+
def parse_cases(file_path):
23+
csrs = set()
24+
with open(file_path, encoding="utf-8") as f:
25+
case_addr = None
26+
for line in f:
27+
m = re.match(r"\s*case\s+(0x[0-9a-fA-F]+):", line)
28+
if m:
29+
case_addr = int(m.group(1), 0) # convert to int
30+
continue
31+
if case_addr:
32+
n = re.match(r'\s*return\s+"([^"]+)";', line)
33+
if n:
34+
csr_name = n.group(1)
35+
csrs.add((csr_name, case_addr))
36+
case_addr = None
37+
return csrs
38+
39+
40+
# Retrieve CSRs present in the Capstone package
41+
# Adds (CSR name / pseudo-instruction, corresponding address) to result set
42+
def get_capstone_csrs():
43+
csrs = set()
44+
45+
md = Cs(CS_ARCH_RISCV, CS_MODE_32)
46+
for CSR in range(2**12 - 1):
47+
csrr_hex = f"{CSR:03x}020f3"
48+
49+
# byte swap
50+
csrr = (
51+
csrr_hex[6]
52+
+ csrr_hex[7]
53+
+ csrr_hex[4]
54+
+ csrr_hex[5]
55+
+ csrr_hex[2]
56+
+ csrr_hex[3]
57+
+ csrr_hex[0]
58+
+ csrr_hex[1]
59+
)
60+
csrr_bytes = bytes.fromhex(csrr)
61+
62+
for i in md.disasm(csrr_bytes, 0x1000):
63+
# Case 1: CSRs having pseudo-instructions
64+
# Example: rdinstreth ra
65+
if i.mnemonic != "csrr":
66+
csrs.add((i.mnemonic, CSR))
67+
continue
68+
69+
# Case 2: named CSR operand
70+
# Example: csrr ra, sstatus
71+
csr_name_split = i.op_str.split(",")
72+
if len(csr_name_split) == 2:
73+
csr_name = csr_name_split[1].strip()
74+
if not csr_name.isnumeric():
75+
csrs.add((csr_name, CSR))
76+
return csrs
77+
78+
79+
# Extract CSR address from pseudo-instructions which are in the form:
80+
# xs1 == 0 && csr == <addr>
81+
# Returns the CSR address if the condition is in the above format else None
82+
def extract_csr_addr(cond):
83+
parts = cond.split("&&")
84+
if len(parts) != 2:
85+
return None
86+
87+
parts = [p.strip() for p in parts]
88+
89+
xs1_valid = False
90+
csr_addr = None
91+
92+
for p in parts:
93+
if "==" not in p:
94+
return None
95+
96+
# split lhs and rhs in equality
97+
left, right = (x.strip() for x in p.split("==", 1))
98+
99+
if left == "xs1":
100+
if right != "0":
101+
return None
102+
xs1_valid = True
103+
continue
104+
105+
if left == "csr":
106+
try:
107+
csr_addr = int(right, 0) # parse both dec and hex addreses
108+
except ValueError:
109+
return None
110+
continue
111+
112+
# unknown left-hand identifier
113+
return None
114+
115+
if not xs1_valid or csr_addr is None:
116+
return None
117+
118+
return csr_addr
119+
120+
121+
# Get pseudo-instructions for `csrrs` to read specific CSRs
122+
def get_pseudo_instr():
123+
csrrs_path = (
124+
f"{os.path.dirname(__file__)}/../../../spec/std/isa/inst/Zicsr/csrrs.yaml"
125+
)
126+
127+
with open(csrrs_path, encoding="utf-8") as f:
128+
data = yaml.safe_load(f)
129+
pseudo_instructions = data["pseudoinstructions"]
130+
res = set()
131+
132+
for d in pseudo_instructions:
133+
addr = extract_csr_addr(d["when"])
134+
if addr != None:
135+
res.add((addr, d["to"]))
136+
return res
137+
138+
return None
139+
140+
141+
def get_pseudo_instr_mapping():
142+
csr_list = load_csrs(
143+
f"{os.path.dirname(__file__)}/../../../spec/std/isa/csr/",
144+
["Zicntr", "F"],
145+
False,
146+
"BOTH",
147+
)
148+
pseudo_instrs = get_pseudo_instr()
149+
pseudo_instr_to_csr = {}
150+
151+
for t in pseudo_instrs:
152+
addr = t[0]
153+
pseudo_instr = t[1].split(" ")[0].strip()
154+
pseudo_instr_to_csr[pseudo_instr] = csr_list[addr].lower()
155+
156+
return pseudo_instr_to_csr
157+
158+
159+
UNHANDLED_CASES = [
160+
(["dscratch"], "Defined as dscratch0, dscratch1 in UDB"),
161+
(
162+
[
163+
"utvec",
164+
"sedeleg",
165+
"uip",
166+
"uepc",
167+
"ustatus",
168+
"ucause",
169+
"sideleg",
170+
"uie",
171+
"utval",
172+
"uscratch",
173+
],
174+
"Part of removed N extension",
175+
),
176+
(["pmpaddr13", "pmpaddr14"], "Address error in capstone 5.0.6 package release"),
177+
]
178+
179+
180+
def main():
181+
parser = argparse.ArgumentParser(
182+
description="Compare CSR switch cases in two C files."
183+
)
184+
parser.add_argument(
185+
"--csr_switch",
186+
help="Path to C file containing CSR switch case",
187+
default=f"{os.path.dirname(__file__)}/../../../gen/capstone/csr_switch.c",
188+
)
189+
args = parser.parse_args()
190+
191+
# build unhandled cases dict
192+
unhandled_cases_dict = {}
193+
for t in UNHANDLED_CASES:
194+
for csr in t[0]:
195+
unhandled_cases_dict[csr] = t[1]
196+
197+
pseudo_instr_to_csr = get_pseudo_instr_mapping()
198+
199+
cases_gen = parse_cases(args.csr_switch) # cases generated using Capstone generator
200+
capstone_csrs = get_capstone_csrs()
201+
diff = capstone_csrs - cases_gen
202+
203+
# remove pseudo-instruction cases present in cases_gen
204+
diff = {
205+
t
206+
for t in diff
207+
if not (
208+
t[0] in pseudo_instr_to_csr
209+
and (pseudo_instr_to_csr[t[0]], t[1]) in cases_gen
210+
)
211+
}
212+
213+
def is_unhandled_case(t):
214+
if t[0] in unhandled_cases_dict:
215+
print(f"Ignoring case - {t[0]}. Reason: {unhandled_cases_dict[t[0]]}")
216+
return True
217+
else:
218+
return False
219+
220+
# remove diff cases which are unhandled
221+
diff = {t for t in diff if not is_unhandled_case(t)}
222+
223+
if len(diff) == 0:
224+
sys.exit(0) # pass
225+
else:
226+
print("CSRs missing in switch statement:", diff)
227+
sys.exit(1) # fail
228+
229+
230+
if __name__ == "__main__":
231+
main()

0 commit comments

Comments
 (0)