From e5d1c24ef1ea227b04bd7964bfa13eca48e7e9b3 Mon Sep 17 00:00:00 2001 From: Andy Ross Date: Mon, 5 Sep 2022 08:03:10 -0700 Subject: [PATCH] DO NOT MERGE: Experiments with instruction encoding extraction The tools in this directory product JSON files describing instruction encodings based on the binutils overlay file xtensa-modules.c. The input format accepted is a .tar.gz or .tar.bz2 overlay tarball as accepted by a crosstools-ng build. Just run "make" in this directory to produce a JSON file for each machine description. (It takes surprisingly long to compile these files, so feel free to build in parallel to speed the process!). Signed-off-by: Andy Ross --- encoding_data/Makefile | 44 ++++++++++ encoding_data/README.txt | 83 ++++++++++++++++++ encoding_data/validate.py | 58 +++++++++++++ encoding_data/xt-insn-gen.c | 166 +++++++++++++++++++++++++++++++++++ encoding_data/xt-insn-mod.h | 169 ++++++++++++++++++++++++++++++++++++ 5 files changed, 520 insertions(+) create mode 100644 encoding_data/Makefile create mode 100644 encoding_data/README.txt create mode 100755 encoding_data/validate.py create mode 100644 encoding_data/xt-insn-gen.c create mode 100644 encoding_data/xt-insn-mod.h diff --git a/encoding_data/Makefile b/encoding_data/Makefile new file mode 100644 index 0000000..3a3a813 --- /dev/null +++ b/encoding_data/Makefile @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2022 Google LLC. All rights reserved. +# Author: Andy Ross + +OVERLAY_DIR = .. + +# Modules file within tarball +MODFILE = binutils/bfd/xtensa-modules.c + +CC = gcc + +# Don't optimize! It adds FAR longer (~4s) to the build time for +# these huge files than the runtime savings (~10ms) +CFLAGS = -std=c11 -Wall -Wno-unused-variable + +JSONS := $(patsubst %.tar,%.json,$(basename $(notdir $(wildcard $(OVERLAY_DIR)/*.tar*)))) + +all: $(JSONS) + +clean: + rm -f tmpmod-* insgen-* *.json + +%.json : gen.% + +# Two kinds of tarball, and the path to the file can be absolute or +# prefixed by a "./". Clumsy. + +tmpmod-%.c : $(OVERLAY_DIR)/%.tar.gz + (tar -O -x -f $< $(MODFILE) || tar -O -x -f $< ./$(MODFILE)) \ + | grep -vP '^#\s*include' > $@ + +tmpmod-%.c : $(OVERLAY_DIR)/%.tar.bz2 + (tar -O -x -f $< $(MODFILE) || tar -O -x -f $< ./$(MODFILE)) \ + | grep -vP '^#\s*include' > $@ + +tmpmod-%.o : tmpmod-%.c + $(CC) $(CFLAGS) -include xt-insn-mod.h -c -o $@ $< + +insgen-% : tmpmod-%.o xt-insn-gen.c + $(CC) $(CFLAGS) -o $@ $^ + +%.json : insgen-% + ./$< > $@ + ./validate.py $@ diff --git a/encoding_data/README.txt b/encoding_data/README.txt new file mode 100644 index 0000000..cf6530b --- /dev/null +++ b/encoding_data/README.txt @@ -0,0 +1,83 @@ + +Xtensa Instruction Encoding Generator +===================================== + +The tools in this directory product JSON files describing instruction +encodings based on the binutils overlay file xtensa-modules.c. The +input format accepted is a .tar.gz or .tar.bz2 overlay tarball as +accepted by a crosstools-ng build. Just run "make" in this directory +to produce a JSON file for each machine description. (It takes +surprisingly long to compile these files, so feel free to build in +parallel to speed the process!). + +JSON Data Format +---------------- + +The output JSON file consists of a single object with an "opcodes" +member, whose value is an array of opcode objects: + +* opcodes[].opcode: + + An ASCII string name for the opcode, e.g. "add" or + "ae_muls32f48p16s.ll". These names are as they appear in the + binutils code, so I believe they are intended to correspond to the + strings accepted by the resulting assembler. + +* opcodes[].variants[]: + + An array of "variant" encodings that can express the opcode in an + instruction stream. + +* opcodes[].variants[].format: + + An ASCII string name for the resulting instruction format, for + example "x24" (a "normal" 3-byte Xtensa instruction), "x16a" (one of + the two "narrow" encodings), or "ae_format2" (a HiFi 4 bundle type). + +* opcodes[].variants[].slot: + + Some instruction formats are VLIW "bundles" that can encode more + than one instruction. These are distinguished by an integer "slot" + ID. + +* opcodes[].variants[].format_bits[]: + + An array of integers representing bits in the resulting instruction + that must be set to one to correctly tag the format in use. The + bits order is little endian, so e.g. a "0" in the list indicates + that the lowest bit of the first byte of the instruction should be + set, a "17" will set the second lowest bit of the third byte, etc... + There is unfortunately no representation in the input data of bits + that must be set to zero, those are left implicit. + +* opcodes[].variants[].opcode_bits[]: + + Similar to format_bits, these are bits in the instruction that must + be set to correctly tag the specific opcode. In the case of VLIW + HiFi bundles, these obviously specify only bits used by the given + slot. Note that the input data is NOT DISJOINT, some opcode records + have the same bit present in both opcode_bits and format_bits. + +* opcodes[].variants[].args[]: + + Each opcode has an array of arguments, each a JSON object. + +* opcodes[].variants[].args[].arg: + + An ASCII string name for the argument, e.g. "art" or "ae_mul_q0". + These names seem to correlate fairly well to printed documentation, + but in some cases the naming is obtuse. + +* opcodes[].variants[].args[].field_bits: + + A list of integers representing the bit positions in the output + instruction of each bit of the argument value. Arguments can have + variable size, e.g. 4 bits can store a register ID, but there are + immediates that can be larger, and there are some ("invisible") + arguments in the input data which are described via a zero-length + field argument. The bit encoding is little endian as before, that + is the first integer stores the LE bit position within the + instruction in which the lowest bit of the argument value should be + placed. Note that these bit positions are NOT IN GENERAL + CONTIGUOUS, NOR IN-ORDER. Especially some HiFi instructions have + odd, disjoint encodings. Be careful. diff --git a/encoding_data/validate.py b/encoding_data/validate.py new file mode 100755 index 0000000..c0c242a --- /dev/null +++ b/encoding_data/validate.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import sys +import json +import pprint +import types + +js = json.load(open(sys.argv[1]), + object_hook=lambda d: types.SimpleNamespace(**d)) + +#pprint.pp(js.opcodes) + +# Map of format -> encoded bits +format_bits = {} + +# Sets of bits indexed by (format,slot) tuple +slot_bits = {} + +# Format to count of slots +format_slots = {} + +# First pass to initialize dicts +for op in js.opcodes: + for v in op.variants: + format_bits[v.format] = v.format_bits + format_slots[v.format] = -1 + slot_bits[(v.format, v.slot)] = set() + +# Second to compute them +for op in js.opcodes: + for v in op.variants: + for a in v.args: + format_slots[v.format] = max([format_slots[v.format], v.slot + 1]) + for b in a.field_bits: + slot_bits[(v.format, v.slot)].add(b) + +# Make sure all instances of a given format have the same bit representation +for op in js.opcodes: + for v in op.variants: + assert v.format_bits == format_bits[v.format] + +# Make sure slots don't overlap with each other +for f in format_bits: + for s1 in range(format_slots[f]): + for s2 in range(s1 + 1, format_slots[f]): + b1 = slot_bits[(f, s1)] + b2 = slot_bits[(f, s2)] + assert b1.intersection(b2) == set() + +# Make sure format bits don't overlap with slots. This is incomplete, +# as the data from xtensa-modules.c only records one bits. Bits +# required to be zero are effectively invisible, they look like +# "holes" to us. +for f in format_bits: + for s in range(format_slots[f]): + sb = slot_bits[(f, s)] + for fb in format_bits[f]: + assert fb not in sb diff --git a/encoding_data/xt-insn-gen.c b/encoding_data/xt-insn-gen.c new file mode 100644 index 0000000..e66927f --- /dev/null +++ b/encoding_data/xt-insn-gen.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright(c) 2022 Google LLC. All rights reserved. +// Author: Andy Ross + +#include "xt-insn-mod.h" +#include +#include + +// Kludgey JSON generator for Xtensa instruction data. Works by +// iteratively encoding data using the utilities provided in the +// binutils overlay files to extract bit positions one at a time. +// Note that a core assumption here is that the fields and operand +// encodings in the ISA are all encoded bitwise, where the only +// transformations being done are motion of individual bits. That is +// true empyrically for all existing instruction subsets, but future +// instruction encodings might get more complicated and this code +// would be surprised. + +int find_format(int slot) +{ + for (int f = 0; f < xtensa_modules.n_formats; f++) { + for (int s = 0; s < xtensa_modules.formats[f].n_slots; s++) { + if (xtensa_modules.formats[f].slots[s] == slot) { + return f; + } + } + } + return -1; +} + +void clear_insn(xtensa_insnbuf insn) +{ + insn[0] = insn[1] = insn[2] = insn[3] = 0; +} + +int find_set_bits(const xtensa_insnbuf insn, unsigned char setbits[MAX_INSN_BITS]) +{ + int n = 0; + for (int b = 0; b < MAX_INSN_BITS; b++) { + if (insn[b / 32] & (1 << (b % 32))) { + setbits[n++] = b; + } + } + return n; +} + +int find_arg_bits(xtensa_set_field_fn setter, set_slot_fn slotter, + unsigned char setbits[MAX_INSN_BITS]) +{ + unsigned char bits2[MAX_INSN_BITS]; + xtensa_insnbuf insn, insn2; + int n = 0; + + for (n = 0; n < MAX_INSN_BITS; n++) { + clear_insn(insn); + clear_insn(insn2); + setter(insn, (1 << n)); + slotter(insn2, insn); + + if (find_set_bits(insn2, bits2) == 0) { + break; + } + setbits[n] = bits2[0]; + } + + return n; +} + +void print_bits(int n, unsigned char bits[MAX_INSN_BITS]) +{ + for (int i = 0; i < n; i++) { + if (i != 0) { + printf(","); + } + printf(" %d", bits[i]); + } +} + +void opcode_variant(xtensa_opcode_encode_fn encoder, int slot, + xtensa_iclass_internal *iclass) +{ + xtensa_slot_internal *s = &xtensa_modules.slots[slot]; + int fid = find_format(slot); + + printf("\n { \"format\" : \"%s\", \"slot\" : %d,\n", s->format, s->slot); + + unsigned char bits[MAX_INSN_BITS]; + xtensa_insnbuf insn, insn2; + int n_bits; + + /* Get the bits needed for the format as a whole first */ + clear_insn(insn); + xtensa_modules.formats[fid].encode(insn); + n_bits = find_set_bits(insn, bits); + + printf(" \"format_bits\" : ["); + print_bits(n_bits, bits); + printf(" ],\n"); + + /* Now the bits for the opcode encoding. This happens + * (somewhat inexplicably) in two parts. First step encodes + * the single instruction into the low bits of the the + * instruction word? + */ + clear_insn(insn); + encoder(insn); + + /* Second step needs to move that to the slot's position in + * the full word? + */ + clear_insn(insn2); + s->set_slot(insn2, insn); + + n_bits = find_set_bits(insn2, bits); + printf(" \"opcode_bits\" : ["); + print_bits(n_bits, bits); + printf(" ],\n"); + + printf(" \"args\" : ["); + for (int i = 0; i < iclass->n_operands; i++) { + int opid = iclass->operands[i].id[0]; + xtensa_operand_internal *op = &xtensa_modules.operands[opid]; + + n_bits = find_arg_bits(s->field_setters[op->field], + s->set_slot, bits); + + if (i != 0) { + printf(","); + } + printf("\n { \"arg\" : \"%s\",\n", op->name); + printf(" \"field_bits\" : ["); + print_bits(n_bits, bits); + printf(" ] }"); + } + printf(" ] }"); +} + +int main(void) +{ + printf("{ \"opcodes\" : ["); + for (int i = 0; i < xtensa_modules.n_opcodes; i++) { + if (i != 0) { + printf(","); + } + printf("\n { \"opcode\" : \"%s\", \"variants\" : [", + xtensa_modules.opcodes[i].name); + + bool first = true; + for (int s = 0; s < xtensa_modules.n_slots; s++) { + void *encoder = xtensa_modules.opcodes[i].encoders[s]; + if(encoder) { + if (!first) { + printf(","); + } + first = false; + + int icid = xtensa_modules.opcodes[i].iclass; + void *iclass = &xtensa_modules.iclasses[icid]; + + opcode_variant(encoder, s, iclass); + } + } + printf(" ] }"); + } + printf("] }\n"); +} diff --git a/encoding_data/xt-insn-mod.h b/encoding_data/xt-insn-mod.h new file mode 100644 index 0000000..8072cc7 --- /dev/null +++ b/encoding_data/xt-insn-mod.h @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright(c) 2022 Google LLC. All rights reserved. +// Author: Andy Ross + +// Minimal redeclaration, slightly reverse-engineered, of the +// interfaces used by Xtensa xtensa-module.c files (ones compatible +// with 2.39 or so, anyway). The full interface is somewhat thicker +// and part of upstream binutils. + +#ifndef _XT_INSN_MOD_H +#define _XT_INSN_MOD_H + +// Biggest HiFi 5 bundle is 11 bytes +#define MAX_INSN_BITS 88 + +typedef unsigned int uint32; +typedef unsigned int xtensa_insnbuf[(MAX_INSN_BITS + 31)/32]; + +typedef struct { + const char *name; + int unused0; + int unused1; +} xtensa_sysreg_internal, xtensa_state_internal; + +typedef struct { + const char *name; + int unused0; +} xtensa_funcUnit_internal; + +typedef struct { + int id; + int unused0; +} xtensa_funcUnit_use; + +typedef struct { + const char *name; + const char *unused0; + int id; + int unused1; + int unused2; +} xtensa_regfile_internal; + +typedef struct { + const char *name; + int unused0; + int unused1; + int unused2; + char io; +} xtensa_interface_internal; + +typedef struct { + int id[1]; + char io; +} xtensa_arg_internal; + +typedef struct { + int n_operands; + xtensa_arg_internal *operands; + int n_states; + xtensa_arg_internal *states; + int n_interfaces; + int *interfaces; +} xtensa_iclass_internal; + +typedef struct { + const char *name; + int field; + int unused0; + int unused1; + int unused2; + void *encode_fn; + void *decode_fn; + void *ator_fn; + void *rtoa_fn; +} xtensa_operand_internal; + +typedef void (*xtensa_opcode_encode_fn)(xtensa_insnbuf); +typedef int (*xtensa_opcode_decode_fn)(const xtensa_insnbuf); +typedef unsigned (*xtensa_get_field_fn)(const xtensa_insnbuf); +typedef void (*xtensa_set_field_fn)(xtensa_insnbuf, unsigned int); + +typedef void (*get_slot_fn) (const xtensa_insnbuf, xtensa_insnbuf); +typedef void (*set_slot_fn) (xtensa_insnbuf, const xtensa_insnbuf); + +typedef struct { + const char *name; + int iclass; + int unused0; + xtensa_opcode_encode_fn *encoders; + int unused1; + void *unused2; +} xtensa_opcode_internal; + +typedef struct { + const char *name; + const char *format; + int slot; + get_slot_fn get_slot; + set_slot_fn set_slot; + xtensa_get_field_fn *field_getters; + xtensa_set_field_fn *field_setters; + xtensa_opcode_decode_fn decoder; + const char *unused0; +} xtensa_slot_internal; + +typedef struct { + const char *name; + int unused0; + xtensa_opcode_encode_fn encode; + int n_slots; + int *slots; +} xtensa_format_internal; + +typedef struct { + int bigendian; + int insn_size; + int unused0; + int n_formats; + xtensa_format_internal *formats; + int (*format_decoder)(const xtensa_insnbuf); + int (*length_decoder)(const unsigned char *); + int n_slots; + xtensa_slot_internal *slots; + int n_fields; + int n_operands; + xtensa_operand_internal *operands; + int n_iclasses; + xtensa_iclass_internal *iclasses; + int n_opcodes; + xtensa_opcode_internal *opcodes; + void *unused1; + int n_regfiles; + void *unused2; + int n_states; + void *states; + void *unused3; + int n_sysregs; + void *sysregs; + void *unused4; + int sysreg_max[2]; + int unused5[2]; + int n_interfaces; + xtensa_interface_internal *interfaces; + void *unused6; + int n_units; + void *units; + void *unused7; +} xtensa_isa_internal; + +#define ATTRIBUTE_UNUSED /**/ + +#define XTENSA_OPERAND_IS_REGISTER 1 +#define XTENSA_OPERAND_IS_PCRELATIVE 2 +#define XTENSA_OPERAND_IS_INVISIBLE 4 +#define XTENSA_OPERAND_IS_UNKNOWN 8 +#define XTENSA_OPCODE_IS_BRANCH 1 +#define XTENSA_OPCODE_IS_JUMP 2 +#define XTENSA_OPCODE_IS_LOOP 4 +#define XTENSA_OPCODE_IS_CALL 8 +#define XTENSA_STATE_IS_EXPORTED 1 +#define XTENSA_STATE_IS_SHARED_OR 2 +#define XTENSA_INTERFACE_HAS_SIDE_EFFECT 1 +#define XTENSA_UNDEFINED -1 + +typedef int xtensa_interface; + +extern xtensa_isa_internal xtensa_modules; + +#endif /* _XT_INSN_MOD_H */