diff --git a/backends/generators/generator.py b/backends/generators/generator.py index 053cfab59..84c86498c 100755 --- a/backends/generators/generator.py +++ b/backends/generators/generator.py @@ -17,6 +17,55 @@ def check_requirement(req, exts): return False +def build_match_from_format(format_field): + """ + Build a match string from the format field in the new schema. + The format field contains opcodes with specific bit fields. + """ + if not format_field or "opcodes" not in format_field: + return None + + opcodes = format_field["opcodes"] + + # Initialize a 32-bit match string with all variable bits + match_bits = ["-"] * 32 + + # Process each opcode field + for field_name, field_data in opcodes.items(): + if field_name == "$child_of": + continue + + if ( + isinstance(field_data, dict) + and "location" in field_data + and "value" in field_data + ): + location = field_data["location"] + value = field_data["value"] + + # Parse the location string (e.g., "31-25" or "7") + if "-" in location: + # Range format like "31-25" + high, low = map(int, location.split("-")) + else: + # Single bit format like "7" + high = low = int(location) + + # Convert value to binary and place in the match string + if isinstance(value, int): + # Calculate the number of bits needed + num_bits = high - low + 1 + binary_value = format(value, f"0{num_bits}b") + + # Place bits in the match string (MSB first) + for i, bit in enumerate(binary_value): + bit_position = high - i + if 0 <= bit_position < 32: + match_bits[31 - bit_position] = bit + + return "".join(match_bits) + + def parse_extension_requirements(extensions_spec): """ Parse the extension requirements from the definedBy field. @@ -177,11 +226,27 @@ def load_instructions( encoding = data.get("encoding", {}) if not encoding: - logging.error( - f"Missing 'encoding' field in instruction {name} in {path}" - ) - encoding_filtered += 1 - continue + # Check if this instruction uses the new schema with a 'format' field + format_field = data.get("format") + if format_field: + # Try to build a match string from the format field + match_string = build_match_from_format(format_field) + if match_string: + # Create a synthetic encoding compatible with existing logic + encoding = {"match": match_string, "variables": []} + logging.debug(f"Built encoding from format field for {name}") + else: + logging.error( + f"Could not build encoding from format field in instruction {name} in {path}" + ) + encoding_filtered += 1 + continue + else: + logging.error( + f"Missing 'encoding' field in instruction {name} in {path}" + ) + encoding_filtered += 1 + continue # Check if the instruction specifies a base architecture constraint base = data.get("base") diff --git a/backends/generators/sverilog/sverilog_generator.py b/backends/generators/sverilog/sverilog_generator.py new file mode 100644 index 000000000..c508bef70 --- /dev/null +++ b/backends/generators/sverilog/sverilog_generator.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 + +import argparse +import os +import sys +import logging +from pathlib import Path + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from generator import load_instructions, load_csrs + + +def format_instruction_name(name): + """Format instruction name for SystemVerilog (uppercase with underscores).""" + # Handle compressed instructions + if name.startswith("c."): + name = "C_" + name[2:] + # Replace dots with underscores and convert to uppercase + return name.replace(".", "_").upper() + + +def format_csr_name(name): + """Format CSR name for SystemVerilog (uppercase with underscores).""" + return "CSR_" + name.replace(".", "_").upper() + + +def match_to_sverilog_bits(match_str, is_compressed=False): + """Convert a match string to SystemVerilog bit pattern.""" + if not match_str: + return "32'b" + "?" * 32 + + # For compressed instructions (16-bit), we need to handle them differently + # The riscv-opcodes format puts the 16-bit pattern in the lower 16 bits + # with the upper 16 bits as wildcards + if is_compressed or len(match_str) == 16: + # Pad with wildcards on the left for 16-bit instructions + match_str = "?" * 16 + match_str + elif len(match_str) < 32: + # For other cases, pad on the right + match_str = match_str + "-" * (32 - len(match_str)) + + # Convert to SystemVerilog format (0, 1, or ?) + result = [] + for bit in match_str: + if bit == "0": + result.append("0") + elif bit == "1": + result.append("1") + else: # '-' or any other character + result.append("?") + + return "32'b" + "".join(result) + + +def generate_sverilog(instructions, csrs, output_file): + """Generate SystemVerilog package file.""" + with open(output_file, "w") as f: + # Write header + f.write("\n/* Automatically generated by parse_opcodes */\n") + f.write("package riscv_instr;\n") + + # Find the maximum name length for alignment + max_instr_len = max((len(format_instruction_name(name)) for name in instructions.keys()), default=0) + max_csr_len = max((len(format_csr_name(csrs[addr])) for addr in csrs.keys()), default=0) + max_len = max(max_instr_len, max_csr_len) + + # Write instruction parameters + for name in sorted(instructions.keys()): + encoding = instructions[name] + sv_name = format_instruction_name(name) + # Pad the name for alignment + padded_name = sv_name.ljust(max_len) + + # Get the match pattern + if isinstance(encoding, dict) and "match" in encoding: + match = encoding["match"] + else: + # If no match field, use all wildcards + match = "-" * 32 + + # Check if this is a compressed instruction + is_compressed = name.startswith("c.") + sv_bits = match_to_sverilog_bits(match, is_compressed) + f.write(f" localparam [31:0] {padded_name} = {sv_bits};\n") + + # Write CSR parameters + # CSRs are returned as {address: name} by load_csrs + for addr in sorted(csrs.keys()): + csr_name = csrs[addr] + sv_name = format_csr_name(csr_name) + # Pad the name for alignment + padded_name = sv_name.ljust(max_len) + + # Format CSR address as 12-bit hex + f.write(f" localparam logic [11:0] {padded_name} = 12'h{addr:03x};\n") + + # Write footer + f.write("\nendpackage\n") + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate SystemVerilog package from RISC-V instruction definitions" + ) + parser.add_argument( + "--inst-dir", + default="../../../gen/resolved_spec/_/inst/", + help="Directory containing instruction YAML files", + ) + parser.add_argument( + "--csr-dir", + default="../../../gen/resolved_spec/_/csr/", + help="Directory containing CSR YAML files", + ) + parser.add_argument( + "--output", + default="inst.sverilog", + help="Output SystemVerilog file name" + ) + parser.add_argument( + "--extensions", + default="A,D,F,I,M,Q,Zba,Zbb,Zbs,S,System,V,Zicsr,Smpmp,Sm,H,U,Zicntr,Zihpm,Smhpm", + help="Comma-separated list of enabled extensions. Default includes standard extensions.", + ) + parser.add_argument( + "--arch", + default="RV64", + choices=["RV32", "RV64", "BOTH"], + help="Target architecture (RV32, RV64, or BOTH). Default is RV64.", + ) + parser.add_argument( + "--verbose", "-v", action="store_true", help="Enable verbose logging" + ) + parser.add_argument( + "--include-all", + action="store_true", + help="Include all instructions and CSRs regardless of extensions", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + + # Set up logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig(level=log_level, format="%(levelname)s:: %(message)s") + + # Parse extensions + if args.include_all: + enabled_extensions = [] + logging.info("Including all instructions and CSRs (ignoring extension filter)") + else: + enabled_extensions = [ext.strip() for ext in args.extensions.split(",")] + logging.info(f"Enabled extensions: {', '.join(enabled_extensions)}") + + logging.info(f"Target architecture: {args.arch}") + + # Load instructions + instructions = load_instructions( + args.inst_dir, enabled_extensions, args.include_all, args.arch + ) + logging.info(f"Loaded {len(instructions)} instructions") + + # Load CSRs + csrs = load_csrs(args.csr_dir, enabled_extensions, args.include_all, args.arch) + logging.info(f"Loaded {len(csrs)} CSRs") + + # Generate the SystemVerilog file + generate_sverilog(instructions, csrs, args.output) + logging.info( + f"Generated {args.output} with {len(instructions)} instructions and {len(csrs)} CSRs" + ) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backends/generators/tasks.rake b/backends/generators/tasks.rake index 2049f8e4e..e26829ad9 100644 --- a/backends/generators/tasks.rake +++ b/backends/generators/tasks.rake @@ -6,6 +6,7 @@ require 'tempfile' directory "#{$root}/gen/go" directory "#{$root}/gen/c_header" +directory "#{$root}/gen/sverilog" namespace :gen do desc <<~DESC @@ -87,4 +88,28 @@ namespace :gen do resolved_codes_file.unlink end end + + desc <<~DESC + Generate SystemVerilog package from RISC-V instruction and CSR definitions + + Options: + * CONFIG - Configuration name (defaults to "_") + * OUTPUT_DIR - Output directory for generated SystemVerilog code (defaults to "#{$root}/gen/sverilog") + DESC + task sverilog: "#{$root}/gen/sverilog" do + config_name = ENV["CONFIG"] || "_" + output_dir = ENV["OUTPUT_DIR"] || "#{$root}/gen/sverilog/" + + # Ensure the output directory exists + FileUtils.mkdir_p output_dir + + # Get the arch paths based on the config + resolver = Udb::Resolver.new + cfg_arch = resolver.cfg_arch_for(config_name) + inst_dir = cfg_arch.path / "inst" + csr_dir = cfg_arch.path / "csr" + + # Run the SystemVerilog generator script using the same Python environment + sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/sverilog/sverilog_generator.py --inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --output=#{output_dir}inst.sverilog" + end end