Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/regress.yml
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,20 @@ jobs:
uses: ./.github/actions/singularity-setup
- name: Generate sverilog_header code
run: ./do gen:sverilog
regress-gen-capstone:
runs-on: ubuntu-latest
env:
SINGULARITY: 1
steps:
- name: Clone Github Repo Action
uses: actions/checkout@v4
- name: singularity setup
uses: ./.github/actions/singularity-setup
- name: Generate capstone code
run: |
./do gen:capstone
python3 -m pip install capstone
python3 tools/python-packages/udb-capstone/regress.py
regress-cpp-unit:
runs-on: ubuntu-latest
env:
Expand Down
30 changes: 28 additions & 2 deletions backends/generators/tasks.rake
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# typed: false
# frozen_string_literal: true

require "udb/resolver"
require 'json'
require 'tempfile'
require "json"
require "tempfile"

directory "#{$root}/gen/go"
directory "#{$root}/gen/c_header"
directory "#{$root}/gen/sverilog"
directory "#{$root}/gen/capstone"

def with_resolved_exception_codes(cfg_arch)
# Process ERB templates in exception codes using Ruby ERB processing
Expand Down Expand Up @@ -127,4 +129,28 @@ namespace :gen do
"--output=#{output_dir}riscv_decode_package.svh --include-all"
end
end

desc <<~DESC
Generate Capstone CSR switch from RISC-V CSR definitions

Options:
* CONFIG - Configuration name (defaults to "_")
* OUTPUT_DIR - Output directory for generated Capstone code (defaults to "#{$root}/gen/capstone")
DESC
task capstone: "#{$root}/gen/capstone" do
config_name = ENV["CONFIG"] || "_"
output_dir = ENV["OUTPUT_DIR"] || "#{$root}/gen/capstone/"

# Ensure the output directory exists
FileUtils.mkdir_p output_dir

# Get the arch paths based on the config
resolver = Udb::Resolver.new
cfg_arch = resolver.cfg_arch_for(config_name)
inst_dir = cfg_arch.path / "inst"
csr_dir = cfg_arch.path / "csr"

# Run the Capstone CSR switch generator Python script
sh "/opt/venv/bin/python3 #{$root}/tools/python-packages/udb-capstone/generate_csr_switch.py --csr-dir=#{csr_dir} --arch=BOTH --output=#{output_dir}csr_switch.c"
end
end
84 changes: 84 additions & 0 deletions tools/python-packages/udb-capstone/generate_csr_switch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env python3

# Copyright (c) Salil Mittal
# SPDX-License-Identifier: BSD-3-Clause-Clear
"""
Generate a C function mapping RISC-V CSR numbers to names using a switch statement.
"""

import sys
import os

# Import functions from generator.py
generator_dir = os.path.abspath(
os.path.join(os.path.dirname(__file__), "../../../backends/generators")
)
sys.path.append(generator_dir)
from generator import load_csrs


def generate_csr_switch(csrs, output_file):
with open(output_file, "w", encoding="utf-8") as f:
fn_str = "/* SP" + "DX-License-Identifier: BSD-3-Clause */" + '"'
fn_str = """
/* Copyright (c) 2025 RISC-V International */
/*
* This file is auto-generated by riscv-unified-db
*/
static const char *getCSRSystemRegisterName(unsigned CsrNo)
{
switch (CsrNo) {
"""
for addr, name in sorted(csrs.items()):
fn_str += f'\tcase 0x{addr:04x}:\n\t\treturn "{name.lower()}";\n'

fn_str += """ }
return NULL;
}
"""
f.write(fn_str)


def main():
import argparse

parser = argparse.ArgumentParser(description="Generate C switch for RISC-V CSRs")
parser.add_argument(
"--csr-dir",
default=os.path.abspath(
os.path.join(os.path.dirname(__file__), "../../../arch/csr/")
),
help="Directory containing CSR YAML files",
)
parser.add_argument(
"--extensions",
default="",
help="Comma-separated list of enabled extensions (default: all)",
)
parser.add_argument(
"--arch",
default="BOTH",
choices=["RV32", "RV64", "BOTH"],
help="Target architecture (RV32, RV64, BOTH)",
)
parser.add_argument(
"--output",
default=os.path.join(os.path.dirname(__file__), "csr_switch.c"),
help="Output C file name",
)
args = parser.parse_args()

enabled_extensions = (
[ext.strip() for ext in args.extensions.split(",") if ext.strip()]
if args.extensions
else []
)
include_all = not enabled_extensions
csrs = load_csrs(args.csr_dir, enabled_extensions, include_all, args.arch)

generate_csr_switch(csrs, args.output)
print(f"Generated: {args.output}")


if __name__ == "__main__":
main()
231 changes: 231 additions & 0 deletions tools/python-packages/udb-capstone/regress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
#!/usr/bin/env python3

# Copyright (c) Salil Mittal
# SPDX-License-Identifier: BSD-3-Clause-Clear

import re
import os
import argparse
import yaml
import sys

# Add path to import generator.py
parent_dir = f"{os.path.dirname(__file__)}/../../../backends/generators"
sys.path.append(parent_dir)

from generator import load_csrs

from capstone import Cs, CS_ARCH_RISCV, CS_MODE_32


# Parse CSRs from the switch case file
def parse_cases(file_path):
csrs = set()
with open(file_path, encoding="utf-8") as f:
case_addr = None
for line in f:
m = re.match(r"\s*case\s+(0x[0-9a-fA-F]+):", line)
if m:
case_addr = int(m.group(1), 0) # convert to int
continue
if case_addr:
n = re.match(r'\s*return\s+"([^"]+)";', line)
if n:
csr_name = n.group(1)
csrs.add((csr_name, case_addr))
case_addr = None
return csrs


# Retrieve CSRs present in the Capstone package
# Adds (CSR name / pseudo-instruction, corresponding address) to result set
def get_capstone_csrs():
csrs = set()

md = Cs(CS_ARCH_RISCV, CS_MODE_32)
for CSR in range(2**12 - 1):
csrr_hex = f"{CSR:03x}020f3"

# byte swap
csrr = (
csrr_hex[6]
+ csrr_hex[7]
+ csrr_hex[4]
+ csrr_hex[5]
+ csrr_hex[2]
+ csrr_hex[3]
+ csrr_hex[0]
+ csrr_hex[1]
)
csrr_bytes = bytes.fromhex(csrr)

for i in md.disasm(csrr_bytes, 0x1000):
# Case 1: CSRs having pseudo-instructions
# Example: rdinstreth ra
if i.mnemonic != "csrr":
csrs.add((i.mnemonic, CSR))
continue

# Case 2: named CSR operand
# Example: csrr ra, sstatus
csr_name_split = i.op_str.split(",")
if len(csr_name_split) == 2:
csr_name = csr_name_split[1].strip()
if not csr_name.isnumeric():
csrs.add((csr_name, CSR))
return csrs


# Extract CSR address from pseudo-instructions which are in the form:
# xs1 == 0 && csr == <addr>
# Returns the CSR address if the condition is in the above format else None
def extract_csr_addr(cond):
parts = cond.split("&&")
if len(parts) != 2:
return None

parts = [p.strip() for p in parts]

xs1_valid = False
csr_addr = None

for p in parts:
if "==" not in p:
return None

# split lhs and rhs in equality
left, right = (x.strip() for x in p.split("==", 1))

if left == "xs1":
if right != "0":
return None
xs1_valid = True
continue

if left == "csr":
try:
csr_addr = int(right, 0) # parse both dec and hex addreses
except ValueError:
return None
continue

# unknown left-hand identifier
return None

if not xs1_valid or csr_addr is None:
return None

return csr_addr


# Get pseudo-instructions for `csrrs` to read specific CSRs
def get_pseudo_instr():
csrrs_path = (
f"{os.path.dirname(__file__)}/../../../spec/std/isa/inst/Zicsr/csrrs.yaml"
)

with open(csrrs_path, encoding="utf-8") as f:
data = yaml.safe_load(f)
pseudo_instructions = data["pseudoinstructions"]
res = set()

for d in pseudo_instructions:
addr = extract_csr_addr(d["when"])
if addr != None:
res.add((addr, d["to"]))
return res

return None


def get_pseudo_instr_mapping():
csr_list = load_csrs(
f"{os.path.dirname(__file__)}/../../../spec/std/isa/csr/",
["Zicntr", "F"],
False,
"BOTH",
)
pseudo_instrs = get_pseudo_instr()
pseudo_instr_to_csr = {}

for t in pseudo_instrs:
addr = t[0]
pseudo_instr = t[1].split(" ")[0].strip()
pseudo_instr_to_csr[pseudo_instr] = csr_list[addr].lower()

return pseudo_instr_to_csr


UNHANDLED_CASES = [
(["dscratch"], "Defined as dscratch0, dscratch1 in UDB"),
(
[
"utvec",
"sedeleg",
"uip",
"uepc",
"ustatus",
"ucause",
"sideleg",
"uie",
"utval",
"uscratch",
],
"Part of removed N extension",
),
(["pmpaddr13", "pmpaddr14"], "Address error in capstone 5.0.6 package release"),
]


def main():
parser = argparse.ArgumentParser(
description="Compare CSR switch cases in two C files."
)
parser.add_argument(
"--csr_switch",
help="Path to C file containing CSR switch case",
default=f"{os.path.dirname(__file__)}/../../../gen/capstone/csr_switch.c",
)
args = parser.parse_args()

# build unhandled cases dict
unhandled_cases_dict = {}
for t in UNHANDLED_CASES:
for csr in t[0]:
unhandled_cases_dict[csr] = t[1]

pseudo_instr_to_csr = get_pseudo_instr_mapping()

cases_gen = parse_cases(args.csr_switch) # cases generated using Capstone generator
capstone_csrs = get_capstone_csrs()
diff = capstone_csrs - cases_gen

# remove pseudo-instruction cases present in cases_gen
diff = {
t
for t in diff
if not (
t[0] in pseudo_instr_to_csr
and (pseudo_instr_to_csr[t[0]], t[1]) in cases_gen
)
}

def is_unhandled_case(t):
if t[0] in unhandled_cases_dict:
print(f"Ignoring case - {t[0]}. Reason: {unhandled_cases_dict[t[0]]}")
return True
else:
return False

# remove diff cases which are unhandled
diff = {t for t in diff if not is_unhandled_case(t)}

if len(diff) == 0:
sys.exit(0) # pass
else:
print("CSRs missing in switch statement:", diff)
sys.exit(1) # fail


if __name__ == "__main__":
main()
Loading