From e5d1c24ef1ea227b04bd7964bfa13eca48e7e9b3 Mon Sep 17 00:00:00 2001
From: Andy Ross <andyross@google.com>
Date: Mon, 5 Sep 2022 08:03:10 -0700
Subject: [PATCH] DO NOT MERGE: Experiments with instruction encoding
 extraction

The tools in this directory product JSON files describing instruction
encodings based on the binutils overlay file xtensa-modules.c.  The
input format accepted is a .tar.gz or .tar.bz2 overlay tarball as
accepted by a crosstools-ng build.  Just run "make" in this directory
to produce a JSON file for each machine description.  (It takes
surprisingly long to compile these files, so feel free to build in
parallel to speed the process!).

Signed-off-by: Andy Ross <andyross@google.com>
---
 encoding_data/Makefile      |  44 ++++++++++
 encoding_data/README.txt    |  83 ++++++++++++++++++
 encoding_data/validate.py   |  58 +++++++++++++
 encoding_data/xt-insn-gen.c | 166 +++++++++++++++++++++++++++++++++++
 encoding_data/xt-insn-mod.h | 169 ++++++++++++++++++++++++++++++++++++
 5 files changed, 520 insertions(+)
 create mode 100644 encoding_data/Makefile
 create mode 100644 encoding_data/README.txt
 create mode 100755 encoding_data/validate.py
 create mode 100644 encoding_data/xt-insn-gen.c
 create mode 100644 encoding_data/xt-insn-mod.h

diff --git a/encoding_data/Makefile b/encoding_data/Makefile
new file mode 100644
index 0000000..3a3a813
--- /dev/null
+++ b/encoding_data/Makefile
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 Google LLC. All rights reserved.
+# Author: Andy Ross <andyross@google.com>
+
+OVERLAY_DIR = ..
+
+# Modules file within tarball
+MODFILE = binutils/bfd/xtensa-modules.c
+
+CC = gcc
+
+# Don't optimize!  It adds FAR longer (~4s) to the build time for
+# these huge files than the runtime savings (~10ms)
+CFLAGS = -std=c11 -Wall -Wno-unused-variable
+
+JSONS := $(patsubst %.tar,%.json,$(basename $(notdir $(wildcard $(OVERLAY_DIR)/*.tar*))))
+
+all: $(JSONS)
+
+clean:
+	rm -f tmpmod-* insgen-* *.json
+
+%.json : gen.%
+
+# Two kinds of tarball, and the path to the file can be absolute or
+# prefixed by a "./".  Clumsy.
+
+tmpmod-%.c : $(OVERLAY_DIR)/%.tar.gz
+	(tar -O -x -f $< $(MODFILE) || tar -O -x -f $< ./$(MODFILE)) \
+	 | grep -vP '^#\s*include' > $@
+
+tmpmod-%.c : $(OVERLAY_DIR)/%.tar.bz2
+	(tar -O -x -f $< $(MODFILE) || tar -O -x -f $< ./$(MODFILE)) \
+	 | grep -vP '^#\s*include' > $@
+
+tmpmod-%.o : tmpmod-%.c
+	$(CC) $(CFLAGS) -include xt-insn-mod.h -c -o $@ $<
+
+insgen-% : tmpmod-%.o xt-insn-gen.c
+	$(CC) $(CFLAGS) -o $@ $^
+
+%.json : insgen-%
+	./$< > $@
+	./validate.py $@
diff --git a/encoding_data/README.txt b/encoding_data/README.txt
new file mode 100644
index 0000000..cf6530b
--- /dev/null
+++ b/encoding_data/README.txt
@@ -0,0 +1,83 @@
+
+Xtensa Instruction Encoding Generator
+=====================================
+
+The tools in this directory product JSON files describing instruction
+encodings based on the binutils overlay file xtensa-modules.c.  The
+input format accepted is a .tar.gz or .tar.bz2 overlay tarball as
+accepted by a crosstools-ng build.  Just run "make" in this directory
+to produce a JSON file for each machine description.  (It takes
+surprisingly long to compile these files, so feel free to build in
+parallel to speed the process!).
+
+JSON Data Format
+----------------
+
+The output JSON file consists of a single object with an "opcodes"
+member, whose value is an array of opcode objects:
+
+* opcodes[].opcode:
+
+  An ASCII string name for the opcode, e.g. "add" or
+  "ae_muls32f48p16s.ll".  These names are as they appear in the
+  binutils code, so I believe they are intended to correspond to the
+  strings accepted by the resulting assembler.
+
+* opcodes[].variants[]:
+
+  An array of "variant" encodings that can express the opcode in an
+  instruction stream.
+
+* opcodes[].variants[].format:
+
+  An ASCII string name for the resulting instruction format, for
+  example "x24" (a "normal" 3-byte Xtensa instruction), "x16a" (one of
+  the two "narrow" encodings), or "ae_format2" (a HiFi 4 bundle type).
+
+* opcodes[].variants[].slot:
+
+  Some instruction formats are VLIW "bundles" that can encode more
+  than one instruction.  These are distinguished by an integer "slot"
+  ID.
+
+* opcodes[].variants[].format_bits[]:
+
+  An array of integers representing bits in the resulting instruction
+  that must be set to one to correctly tag the format in use.  The
+  bits order is little endian, so e.g. a "0" in the list indicates
+  that the lowest bit of the first byte of the instruction should be
+  set, a "17" will set the second lowest bit of the third byte, etc...
+  There is unfortunately no representation in the input data of bits
+  that must be set to zero, those are left implicit.
+
+* opcodes[].variants[].opcode_bits[]:
+
+  Similar to format_bits, these are bits in the instruction that must
+  be set to correctly tag the specific opcode.  In the case of VLIW
+  HiFi bundles, these obviously specify only bits used by the given
+  slot.  Note that the input data is NOT DISJOINT, some opcode records
+  have the same bit present in both opcode_bits and format_bits.
+
+* opcodes[].variants[].args[]:
+
+  Each opcode has an array of arguments, each a JSON object.
+
+* opcodes[].variants[].args[].arg:
+
+  An ASCII string name for the argument, e.g. "art" or "ae_mul_q0".
+  These names seem to correlate fairly well to printed documentation,
+  but in some cases the naming is obtuse.
+
+* opcodes[].variants[].args[].field_bits:
+
+  A list of integers representing the bit positions in the output
+  instruction of each bit of the argument value.  Arguments can have
+  variable size, e.g. 4 bits can store a register ID, but there are
+  immediates that can be larger, and there are some ("invisible")
+  arguments in the input data which are described via a zero-length
+  field argument.  The bit encoding is little endian as before, that
+  is the first integer stores the LE bit position within the
+  instruction in which the lowest bit of the argument value should be
+  placed.  Note that these bit positions are NOT IN GENERAL
+  CONTIGUOUS, NOR IN-ORDER.  Especially some HiFi instructions have
+  odd, disjoint encodings.  Be careful.
diff --git a/encoding_data/validate.py b/encoding_data/validate.py
new file mode 100755
index 0000000..c0c242a
--- /dev/null
+++ b/encoding_data/validate.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+
+import sys
+import json
+import pprint
+import types
+
+js = json.load(open(sys.argv[1]),
+               object_hook=lambda d: types.SimpleNamespace(**d))
+
+#pprint.pp(js.opcodes)
+
+# Map of format -> encoded bits
+format_bits = {}
+
+# Sets of bits indexed by (format,slot) tuple
+slot_bits = {}
+
+# Format to count of slots
+format_slots = {}
+
+# First pass to initialize dicts
+for op in js.opcodes:
+    for v in op.variants:
+        format_bits[v.format] = v.format_bits
+        format_slots[v.format] = -1
+        slot_bits[(v.format, v.slot)] = set()
+
+# Second to compute them
+for op in js.opcodes:
+    for v in op.variants:
+        for a in v.args:
+            format_slots[v.format] = max([format_slots[v.format], v.slot + 1])
+            for b in a.field_bits:
+                slot_bits[(v.format, v.slot)].add(b)
+
+# Make sure all instances of a given format have the same bit representation
+for op in js.opcodes:
+    for v in op.variants:
+        assert v.format_bits == format_bits[v.format]
+
+# Make sure slots don't overlap with each other
+for f in format_bits:
+    for s1 in range(format_slots[f]):
+        for s2 in range(s1 + 1, format_slots[f]):
+            b1 = slot_bits[(f, s1)]
+            b2 = slot_bits[(f, s2)]
+            assert b1.intersection(b2) == set()
+
+# Make sure format bits don't overlap with slots.  This is incomplete,
+# as the data from xtensa-modules.c only records one bits.  Bits
+# required to be zero are effectively invisible, they look like
+# "holes" to us.
+for f in format_bits:
+    for s in range(format_slots[f]):
+        sb = slot_bits[(f, s)]
+        for fb in format_bits[f]:
+            assert fb not in sb
diff --git a/encoding_data/xt-insn-gen.c b/encoding_data/xt-insn-gen.c
new file mode 100644
index 0000000..e66927f
--- /dev/null
+++ b/encoding_data/xt-insn-gen.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// Copyright(c) 2022 Google LLC. All rights reserved.
+// Author: Andy Ross <andyross@google.com>
+
+#include "xt-insn-mod.h"
+#include <stdio.h>
+#include <stdbool.h>
+
+// Kludgey JSON generator for Xtensa instruction data.  Works by
+// iteratively encoding data using the utilities provided in the
+// binutils overlay files to extract bit positions one at a time.
+// Note that a core assumption here is that the fields and operand
+// encodings in the ISA are all encoded bitwise, where the only
+// transformations being done are motion of individual bits.  That is
+// true empyrically for all existing instruction subsets, but future
+// instruction encodings might get more complicated and this code
+// would be surprised.
+
+int find_format(int slot)
+{
+	for (int f = 0; f < xtensa_modules.n_formats; f++) {
+		for (int s = 0; s < xtensa_modules.formats[f].n_slots; s++) {
+			if (xtensa_modules.formats[f].slots[s] == slot) {
+				return f;
+			}
+		}
+	}
+	return -1;
+}
+
+void clear_insn(xtensa_insnbuf insn)
+{
+	insn[0] = insn[1] = insn[2] = insn[3] = 0;
+}
+
+int find_set_bits(const xtensa_insnbuf insn, unsigned char setbits[MAX_INSN_BITS])
+{
+	int n = 0;
+	for (int b = 0; b < MAX_INSN_BITS; b++) {
+		if (insn[b / 32] & (1 << (b % 32))) {
+			setbits[n++] = b;
+		}
+	}
+	return n;
+}
+
+int find_arg_bits(xtensa_set_field_fn setter, set_slot_fn slotter,
+		  unsigned char setbits[MAX_INSN_BITS])
+{
+	unsigned char bits2[MAX_INSN_BITS];
+	xtensa_insnbuf insn, insn2;
+	int n = 0;
+
+	for (n = 0; n < MAX_INSN_BITS; n++) {
+		clear_insn(insn);
+		clear_insn(insn2);
+		setter(insn, (1 << n));
+		slotter(insn2, insn);
+
+		if (find_set_bits(insn2, bits2) == 0) {
+			break;
+		}
+		setbits[n] = bits2[0];
+	}
+
+	return n;
+}
+
+void print_bits(int n, unsigned char bits[MAX_INSN_BITS])
+{
+	for (int i = 0; i < n; i++) {
+		if (i != 0) {
+			printf(",");
+		}
+		printf(" %d", bits[i]);
+	}
+}
+
+void opcode_variant(xtensa_opcode_encode_fn encoder, int slot,
+		    xtensa_iclass_internal *iclass)
+{
+	xtensa_slot_internal *s = &xtensa_modules.slots[slot];
+	int fid = find_format(slot);
+
+	printf("\n   { \"format\" : \"%s\", \"slot\" : %d,\n", s->format, s->slot);
+
+	unsigned char bits[MAX_INSN_BITS];
+	xtensa_insnbuf insn, insn2;
+	int n_bits;
+
+	/* Get the bits needed for the format as a whole first */
+	clear_insn(insn);
+	xtensa_modules.formats[fid].encode(insn);
+	n_bits = find_set_bits(insn, bits);
+
+	printf("     \"format_bits\" : [");
+	print_bits(n_bits, bits);
+	printf(" ],\n");
+
+	/* Now the bits for the opcode encoding.  This happens
+	 * (somewhat inexplicably) in two parts.  First step encodes
+	 * the single instruction into the low bits of the the
+	 * instruction word?
+	 */
+	clear_insn(insn);
+	encoder(insn);
+
+	/* Second step needs to move that to the slot's position in
+	 * the full word?
+	 */
+	clear_insn(insn2);
+	s->set_slot(insn2, insn);
+
+	n_bits = find_set_bits(insn2, bits);
+	printf("     \"opcode_bits\" : [");
+	print_bits(n_bits, bits);
+	printf(" ],\n");
+
+	printf("     \"args\" : [");
+	for (int i = 0; i < iclass->n_operands; i++) {
+		int opid = iclass->operands[i].id[0];
+		xtensa_operand_internal *op = &xtensa_modules.operands[opid];
+
+		n_bits = find_arg_bits(s->field_setters[op->field],
+				       s->set_slot, bits);
+
+		if (i != 0) {
+			printf(",");
+		}
+		printf("\n       { \"arg\" : \"%s\",\n", op->name);
+		printf("         \"field_bits\" : [");
+		print_bits(n_bits, bits);
+		printf(" ] }");
+	}
+	printf(" ] }");
+}
+
+int main(void)
+{
+	printf("{ \"opcodes\" : [");
+	for (int i = 0; i < xtensa_modules.n_opcodes; i++) {
+		if (i != 0) {
+			printf(",");
+		}
+		printf("\n { \"opcode\" : \"%s\", \"variants\" : [",
+		       xtensa_modules.opcodes[i].name);
+
+		bool first = true;
+		for (int s = 0; s < xtensa_modules.n_slots; s++) {
+			void *encoder = xtensa_modules.opcodes[i].encoders[s];
+			if(encoder) {
+				if (!first) {
+					printf(",");
+				}
+				first = false;
+
+				int icid = xtensa_modules.opcodes[i].iclass;
+				void *iclass = &xtensa_modules.iclasses[icid];
+
+				opcode_variant(encoder, s, iclass);
+			}
+		}
+		printf(" ] }");
+	}
+	printf("] }\n");
+}
diff --git a/encoding_data/xt-insn-mod.h b/encoding_data/xt-insn-mod.h
new file mode 100644
index 0000000..8072cc7
--- /dev/null
+++ b/encoding_data/xt-insn-mod.h
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// Copyright(c) 2022 Google LLC. All rights reserved.
+// Author: Andy Ross <andyross@google.com>
+
+// Minimal redeclaration, slightly reverse-engineered, of the
+// interfaces used by Xtensa xtensa-module.c files (ones compatible
+// with 2.39 or so, anyway).  The full interface is somewhat thicker
+// and part of upstream binutils.
+
+#ifndef _XT_INSN_MOD_H
+#define _XT_INSN_MOD_H
+
+// Biggest HiFi 5 bundle is 11 bytes
+#define MAX_INSN_BITS 88
+
+typedef unsigned int uint32;
+typedef unsigned int xtensa_insnbuf[(MAX_INSN_BITS + 31)/32];
+
+typedef struct {
+	const char *name;
+	int unused0;
+	int unused1;
+} xtensa_sysreg_internal, xtensa_state_internal;
+
+typedef struct {
+	const char *name;
+	int unused0;
+} xtensa_funcUnit_internal;
+
+typedef struct {
+	int id;
+	int unused0;
+} xtensa_funcUnit_use;
+
+typedef struct {
+	const char *name;
+	const char *unused0;
+	int id;
+	int unused1;
+	int unused2;
+} xtensa_regfile_internal;
+
+typedef struct {
+	const char *name;
+	int unused0;
+	int unused1;
+	int unused2;
+	char io;
+} xtensa_interface_internal;
+
+typedef struct {
+	int id[1];
+	char io;
+} xtensa_arg_internal;
+
+typedef struct {
+	int n_operands;
+	xtensa_arg_internal *operands;
+	int n_states;
+	xtensa_arg_internal *states;
+	int n_interfaces;
+	int *interfaces;
+} xtensa_iclass_internal;
+
+typedef struct {
+	const char *name;
+	int field;
+	int unused0;
+	int unused1;
+	int unused2;
+	void *encode_fn;
+	void *decode_fn;
+	void *ator_fn;
+	void *rtoa_fn;
+} xtensa_operand_internal;
+
+typedef void (*xtensa_opcode_encode_fn)(xtensa_insnbuf);
+typedef int (*xtensa_opcode_decode_fn)(const xtensa_insnbuf);
+typedef unsigned (*xtensa_get_field_fn)(const xtensa_insnbuf);
+typedef void (*xtensa_set_field_fn)(xtensa_insnbuf, unsigned int);
+
+typedef void (*get_slot_fn) (const xtensa_insnbuf, xtensa_insnbuf);
+typedef void (*set_slot_fn) (xtensa_insnbuf, const xtensa_insnbuf);
+
+typedef struct {
+	const char *name;
+	int iclass;
+	int unused0;
+	xtensa_opcode_encode_fn *encoders;
+	int unused1;
+	void *unused2;
+} xtensa_opcode_internal;
+
+typedef struct {
+	const char *name;
+	const char *format;
+	int slot;
+	get_slot_fn get_slot;
+	set_slot_fn set_slot;
+	xtensa_get_field_fn *field_getters;
+	xtensa_set_field_fn *field_setters;
+	xtensa_opcode_decode_fn decoder;
+	const char *unused0;
+} xtensa_slot_internal;
+
+typedef struct {
+	const char *name;
+	int unused0;
+	xtensa_opcode_encode_fn encode;
+	int n_slots;
+	int *slots;
+} xtensa_format_internal;
+
+typedef struct {
+	int bigendian;
+	int insn_size;
+	int unused0;
+	int n_formats;
+	xtensa_format_internal *formats;
+	int (*format_decoder)(const xtensa_insnbuf);
+	int (*length_decoder)(const unsigned char *);
+	int n_slots;
+	xtensa_slot_internal *slots;
+	int n_fields;
+	int n_operands;
+	xtensa_operand_internal *operands;
+	int n_iclasses;
+	xtensa_iclass_internal *iclasses;
+	int n_opcodes;
+	xtensa_opcode_internal *opcodes;
+	void *unused1;
+	int n_regfiles;
+	void *unused2;
+	int n_states;
+	void *states;
+	void *unused3;
+	int n_sysregs;
+	void *sysregs;
+	void *unused4;
+	int sysreg_max[2];
+	int unused5[2];
+	int n_interfaces;
+	xtensa_interface_internal *interfaces;
+	void *unused6;
+	int n_units;
+	void *units;
+	void *unused7;
+} xtensa_isa_internal;
+
+#define ATTRIBUTE_UNUSED /**/
+
+#define XTENSA_OPERAND_IS_REGISTER       1
+#define XTENSA_OPERAND_IS_PCRELATIVE     2
+#define XTENSA_OPERAND_IS_INVISIBLE      4
+#define XTENSA_OPERAND_IS_UNKNOWN        8
+#define XTENSA_OPCODE_IS_BRANCH          1
+#define XTENSA_OPCODE_IS_JUMP            2
+#define XTENSA_OPCODE_IS_LOOP            4
+#define XTENSA_OPCODE_IS_CALL            8
+#define XTENSA_STATE_IS_EXPORTED         1
+#define XTENSA_STATE_IS_SHARED_OR        2
+#define XTENSA_INTERFACE_HAS_SIDE_EFFECT 1
+#define XTENSA_UNDEFINED -1
+
+typedef int xtensa_interface;
+
+extern xtensa_isa_internal xtensa_modules;
+
+#endif /* _XT_INSN_MOD_H */