diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml index 191bbcad7..398656ef5 100755 --- a/.github/workflows/regress.yml +++ b/.github/workflows/regress.yml @@ -201,6 +201,20 @@ jobs: uses: ./.github/actions/singularity-setup - name: Generate sverilog_header code run: ./do gen:sverilog + regress-gen-capstone: + runs-on: ubuntu-latest + env: + SINGULARITY: 1 + steps: + - name: Clone Github Repo Action + uses: actions/checkout@v4 + - name: singularity setup + uses: ./.github/actions/singularity-setup + - name: Generate capstone code + run: | + ./do gen:capstone + python3 -m pip install capstone + python3 tools/python-packages/udb-capstone/regress.py regress-cpp-unit: runs-on: ubuntu-latest env: diff --git a/backends/generators/tasks.rake b/backends/generators/tasks.rake index b574aee4d..413c010ef 100644 --- a/backends/generators/tasks.rake +++ b/backends/generators/tasks.rake @@ -1,12 +1,14 @@ +# typed: false # frozen_string_literal: true require "udb/resolver" -require 'json' -require 'tempfile' +require "json" +require "tempfile" directory "#{$root}/gen/go" directory "#{$root}/gen/c_header" directory "#{$root}/gen/sverilog" +directory "#{$root}/gen/capstone" def with_resolved_exception_codes(cfg_arch) # Process ERB templates in exception codes using Ruby ERB processing @@ -127,4 +129,28 @@ namespace :gen do "--output=#{output_dir}riscv_decode_package.svh --include-all" end end + + desc <<~DESC + Generate Capstone CSR switch from RISC-V CSR definitions + + Options: + * CONFIG - Configuration name (defaults to "_") + * OUTPUT_DIR - Output directory for generated Capstone code (defaults to "#{$root}/gen/capstone") + DESC + task capstone: "#{$root}/gen/capstone" do + config_name = ENV["CONFIG"] || "_" + output_dir = ENV["OUTPUT_DIR"] || "#{$root}/gen/capstone/" + + # Ensure the output directory exists + FileUtils.mkdir_p output_dir + + # Get the arch paths based on the config + resolver = Udb::Resolver.new + cfg_arch = resolver.cfg_arch_for(config_name) + inst_dir = cfg_arch.path / "inst" + csr_dir = cfg_arch.path / "csr" + + # Run the Capstone CSR switch generator Python script + sh "/opt/venv/bin/python3 #{$root}/tools/python-packages/udb-capstone/generate_csr_switch.py --csr-dir=#{csr_dir} --arch=BOTH --output=#{output_dir}csr_switch.c" + end end diff --git a/tools/python-packages/udb-capstone/generate_csr_switch.py b/tools/python-packages/udb-capstone/generate_csr_switch.py new file mode 100644 index 000000000..5a9f7f658 --- /dev/null +++ b/tools/python-packages/udb-capstone/generate_csr_switch.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +# Copyright (c) Salil Mittal +# SPDX-License-Identifier: BSD-3-Clause-Clear +""" +Generate a C function mapping RISC-V CSR numbers to names using a switch statement. +""" + +import sys +import os + +# Import functions from generator.py +generator_dir = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../../../backends/generators") +) +sys.path.append(generator_dir) +from generator import load_csrs + + +def generate_csr_switch(csrs, output_file): + with open(output_file, "w", encoding="utf-8") as f: + fn_str = "/* SP" + "DX-License-Identifier: BSD-3-Clause */" + '"' + fn_str = """ + /* Copyright (c) 2025 RISC-V International */ + /* + * This file is auto-generated by riscv-unified-db + */ +static const char *getCSRSystemRegisterName(unsigned CsrNo) +{ + switch (CsrNo) { +""" + for addr, name in sorted(csrs.items()): + fn_str += f'\tcase 0x{addr:04x}:\n\t\treturn "{name.lower()}";\n' + + fn_str += """ } + return NULL; +} +""" + f.write(fn_str) + + +def main(): + import argparse + + parser = argparse.ArgumentParser(description="Generate C switch for RISC-V CSRs") + parser.add_argument( + "--csr-dir", + default=os.path.abspath( + os.path.join(os.path.dirname(__file__), "../../../arch/csr/") + ), + help="Directory containing CSR YAML files", + ) + parser.add_argument( + "--extensions", + default="", + help="Comma-separated list of enabled extensions (default: all)", + ) + parser.add_argument( + "--arch", + default="BOTH", + choices=["RV32", "RV64", "BOTH"], + help="Target architecture (RV32, RV64, BOTH)", + ) + parser.add_argument( + "--output", + default=os.path.join(os.path.dirname(__file__), "csr_switch.c"), + help="Output C file name", + ) + args = parser.parse_args() + + enabled_extensions = ( + [ext.strip() for ext in args.extensions.split(",") if ext.strip()] + if args.extensions + else [] + ) + include_all = not enabled_extensions + csrs = load_csrs(args.csr_dir, enabled_extensions, include_all, args.arch) + + generate_csr_switch(csrs, args.output) + print(f"Generated: {args.output}") + + +if __name__ == "__main__": + main() diff --git a/tools/python-packages/udb-capstone/regress.py b/tools/python-packages/udb-capstone/regress.py new file mode 100644 index 000000000..a47b555f9 --- /dev/null +++ b/tools/python-packages/udb-capstone/regress.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 + +# Copyright (c) Salil Mittal +# SPDX-License-Identifier: BSD-3-Clause-Clear + +import re +import os +import argparse +import yaml +import sys + +# Add path to import generator.py +parent_dir = f"{os.path.dirname(__file__)}/../../../backends/generators" +sys.path.append(parent_dir) + +from generator import load_csrs + +from capstone import Cs, CS_ARCH_RISCV, CS_MODE_32 + + +# Parse CSRs from the switch case file +def parse_cases(file_path): + csrs = set() + with open(file_path, encoding="utf-8") as f: + case_addr = None + for line in f: + m = re.match(r"\s*case\s+(0x[0-9a-fA-F]+):", line) + if m: + case_addr = int(m.group(1), 0) # convert to int + continue + if case_addr: + n = re.match(r'\s*return\s+"([^"]+)";', line) + if n: + csr_name = n.group(1) + csrs.add((csr_name, case_addr)) + case_addr = None + return csrs + + +# Retrieve CSRs present in the Capstone package +# Adds (CSR name / pseudo-instruction, corresponding address) to result set +def get_capstone_csrs(): + csrs = set() + + md = Cs(CS_ARCH_RISCV, CS_MODE_32) + for CSR in range(2**12 - 1): + csrr_hex = f"{CSR:03x}020f3" + + # byte swap + csrr = ( + csrr_hex[6] + + csrr_hex[7] + + csrr_hex[4] + + csrr_hex[5] + + csrr_hex[2] + + csrr_hex[3] + + csrr_hex[0] + + csrr_hex[1] + ) + csrr_bytes = bytes.fromhex(csrr) + + for i in md.disasm(csrr_bytes, 0x1000): + # Case 1: CSRs having pseudo-instructions + # Example: rdinstreth ra + if i.mnemonic != "csrr": + csrs.add((i.mnemonic, CSR)) + continue + + # Case 2: named CSR operand + # Example: csrr ra, sstatus + csr_name_split = i.op_str.split(",") + if len(csr_name_split) == 2: + csr_name = csr_name_split[1].strip() + if not csr_name.isnumeric(): + csrs.add((csr_name, CSR)) + return csrs + + +# Extract CSR address from pseudo-instructions which are in the form: +# xs1 == 0 && csr == +# Returns the CSR address if the condition is in the above format else None +def extract_csr_addr(cond): + parts = cond.split("&&") + if len(parts) != 2: + return None + + parts = [p.strip() for p in parts] + + xs1_valid = False + csr_addr = None + + for p in parts: + if "==" not in p: + return None + + # split lhs and rhs in equality + left, right = (x.strip() for x in p.split("==", 1)) + + if left == "xs1": + if right != "0": + return None + xs1_valid = True + continue + + if left == "csr": + try: + csr_addr = int(right, 0) # parse both dec and hex addreses + except ValueError: + return None + continue + + # unknown left-hand identifier + return None + + if not xs1_valid or csr_addr is None: + return None + + return csr_addr + + +# Get pseudo-instructions for `csrrs` to read specific CSRs +def get_pseudo_instr(): + csrrs_path = ( + f"{os.path.dirname(__file__)}/../../../spec/std/isa/inst/Zicsr/csrrs.yaml" + ) + + with open(csrrs_path, encoding="utf-8") as f: + data = yaml.safe_load(f) + pseudo_instructions = data["pseudoinstructions"] + res = set() + + for d in pseudo_instructions: + addr = extract_csr_addr(d["when"]) + if addr != None: + res.add((addr, d["to"])) + return res + + return None + + +def get_pseudo_instr_mapping(): + csr_list = load_csrs( + f"{os.path.dirname(__file__)}/../../../spec/std/isa/csr/", + ["Zicntr", "F"], + False, + "BOTH", + ) + pseudo_instrs = get_pseudo_instr() + pseudo_instr_to_csr = {} + + for t in pseudo_instrs: + addr = t[0] + pseudo_instr = t[1].split(" ")[0].strip() + pseudo_instr_to_csr[pseudo_instr] = csr_list[addr].lower() + + return pseudo_instr_to_csr + + +UNHANDLED_CASES = [ + (["dscratch"], "Defined as dscratch0, dscratch1 in UDB"), + ( + [ + "utvec", + "sedeleg", + "uip", + "uepc", + "ustatus", + "ucause", + "sideleg", + "uie", + "utval", + "uscratch", + ], + "Part of removed N extension", + ), + (["pmpaddr13", "pmpaddr14"], "Address error in capstone 5.0.6 package release"), +] + + +def main(): + parser = argparse.ArgumentParser( + description="Compare CSR switch cases in two C files." + ) + parser.add_argument( + "--csr_switch", + help="Path to C file containing CSR switch case", + default=f"{os.path.dirname(__file__)}/../../../gen/capstone/csr_switch.c", + ) + args = parser.parse_args() + + # build unhandled cases dict + unhandled_cases_dict = {} + for t in UNHANDLED_CASES: + for csr in t[0]: + unhandled_cases_dict[csr] = t[1] + + pseudo_instr_to_csr = get_pseudo_instr_mapping() + + cases_gen = parse_cases(args.csr_switch) # cases generated using Capstone generator + capstone_csrs = get_capstone_csrs() + diff = capstone_csrs - cases_gen + + # remove pseudo-instruction cases present in cases_gen + diff = { + t + for t in diff + if not ( + t[0] in pseudo_instr_to_csr + and (pseudo_instr_to_csr[t[0]], t[1]) in cases_gen + ) + } + + def is_unhandled_case(t): + if t[0] in unhandled_cases_dict: + print(f"Ignoring case - {t[0]}. Reason: {unhandled_cases_dict[t[0]]}") + return True + else: + return False + + # remove diff cases which are unhandled + diff = {t for t in diff if not is_unhandled_case(t)} + + if len(diff) == 0: + sys.exit(0) # pass + else: + print("CSRs missing in switch statement:", diff) + sys.exit(1) # fail + + +if __name__ == "__main__": + main()