diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 000000000..056cfb47a --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# format all code +59387eb56421aca501f92dd99cd147114e8fc19a \ No newline at end of file diff --git a/.ocamlformat b/.ocamlformat new file mode 100644 index 000000000..912bea406 --- /dev/null +++ b/.ocamlformat @@ -0,0 +1,2 @@ +profile = default +version = 0.27.0 diff --git a/bap-abi.opam b/bap-abi.opam index f9b888bcd..b50d84387 100644 --- a/bap-abi.opam +++ b/bap-abi.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "regular" {= version} diff --git a/bap-analyze.opam b/bap-analyze.opam index 46ecdfa53..c670131a2 100644 --- a/bap-analyze.opam +++ b/bap-analyze.opam @@ -15,7 +15,7 @@ depends: [ "bap-main" {= version} "bap-std" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "linenoise" {>= "1.1.0" & < "2.0.0"} "monads" {= version} "bap-common" {= version} diff --git a/bap-api.opam b/bap-api.opam index 646e14c57..025a79fb3 100644 --- a/bap-api.opam +++ b/bap-api.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "fileutils" "ppx_bap" {= version} "regular" {= version} diff --git a/bap-arm.opam b/bap-arm.opam index b83285d8f..ffd73ae45 100644 --- a/bap-arm.opam +++ b/bap-arm.opam @@ -21,7 +21,7 @@ depends: [ "bap-traces" {= version} "bitvec-order" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "bap-common" {= version} "ogre" {= version} diff --git a/bap-beagle-strings.opam b/bap-beagle-strings.opam index 9a6d57979..f4156a8cd 100644 --- a/bap-beagle-strings.opam +++ b/bap-beagle-strings.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "bap-beagle" {= version} "ppx_bap" {= version} diff --git a/bap-beagle.opam b/bap-beagle.opam index 7270ca63c..2e94dfb72 100644 --- a/bap-beagle.opam +++ b/bap-beagle.opam @@ -15,7 +15,7 @@ depends: [ "bap-primus" {= version} "bap-std" {= version} "bap-strings" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "bap-common" {= version} "ppx_bap" {= version} diff --git a/bap-bil.opam b/bap-bil.opam index e83bc7991..e4f7e2aba 100644 --- a/bap-bil.opam +++ b/bap-bil.opam @@ -17,7 +17,7 @@ depends: [ "bap-std" {= version} "bitvec-order" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "bap-common" {= version} "ogre" {= version} diff --git a/bap-bml.opam b/bap-bml.opam index e8b751a24..59165a15e 100644 --- a/bap-bml.opam +++ b/bap-bml.opam @@ -11,7 +11,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "odoc" {with-doc} diff --git a/bap-build.opam b/bap-build.opam index 91519beba..a05df8a80 100644 --- a/bap-build.opam +++ b/bap-build.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ocamlbuild" "ocamlfind" diff --git a/bap-bundle.opam b/bap-bundle.opam index ac9cc7adf..87bee0fae 100644 --- a/bap-bundle.opam +++ b/bap-bundle.opam @@ -11,7 +11,8 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "camlzip" {>= "1.0" & < "2.0"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" + "core_kernel" "fileutils" "bap-common" {= version} "ppx_bap" {= version} diff --git a/bap-byteweight-frontend.opam b/bap-byteweight-frontend.opam index 82ce3b459..f6bcea1b8 100644 --- a/bap-byteweight-frontend.opam +++ b/bap-byteweight-frontend.opam @@ -13,7 +13,7 @@ depends: [ "bap-byteweight" {= version} "bap-std" {= version} "cmdliner" {>= "1.0" & < "2.0"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "fileutils" "bap-common" {= version} "ocurl" {>= "0.9.0"} diff --git a/bap-byteweight.opam b/bap-byteweight.opam index 30eebb787..673f22c54 100644 --- a/bap-byteweight.opam +++ b/bap-byteweight.opam @@ -13,7 +13,7 @@ depends: [ "bap-signatures" {= version} "bap-std" {= version} "camlzip" {>= "1.0" & < "2.0"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "regular" {= version} diff --git a/bap-c.opam b/bap-c.opam index 03065f4c5..abb8466ff 100644 --- a/bap-c.opam +++ b/bap-c.opam @@ -15,7 +15,7 @@ depends: [ "bap-core-theory" {= version} "bap-knowledge" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "odoc" {with-doc} diff --git a/bap-cache.opam b/bap-cache.opam index 5091474c2..d3f4533a7 100644 --- a/bap-cache.opam +++ b/bap-cache.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "fileutils" "mmap" "bap-common" {= version} diff --git a/bap-callgraph-collator.opam b/bap-callgraph-collator.opam index 818d1bba6..86efdf7b4 100644 --- a/bap-callgraph-collator.opam +++ b/bap-callgraph-collator.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "graphlib" {= version} "bap-common" {= version} "ppx_bap" {= version} diff --git a/bap-callsites.opam b/bap-callsites.opam index f3ab5be24..bde157f22 100644 --- a/bap-callsites.opam +++ b/bap-callsites.opam @@ -11,7 +11,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "odoc" {with-doc} ] diff --git a/bap-common.opam b/bap-common.opam index 38b5e8d38..e7fb81885 100644 --- a/bap-common.opam +++ b/bap-common.opam @@ -9,12 +9,12 @@ tags: ["bap" "meta"] homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ - "base" {>= "v0.14" & < "v0.16"} + "base" "dune" {>= "3.1"} "dune-configurator" "dune-site" - "ocaml" {> "4.08.0"} - "stdio" {>= "v0.14" & < "v0.16"} + "ocaml" + "stdio" "odoc" {with-doc} ] build: [ diff --git a/bap-constant-tracker.opam b/bap-constant-tracker.opam index 5694ea248..f4200c061 100644 --- a/bap-constant-tracker.opam +++ b/bap-constant-tracker.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "odoc" {with-doc} ] diff --git a/bap-core-theory.opam b/bap-core-theory.opam index 23dba49f1..8bb657188 100644 --- a/bap-core-theory.opam +++ b/bap-core-theory.opam @@ -18,7 +18,7 @@ depends: [ "bitvec-binprot" {= version} "bitvec-order" {= version} "bitvec-sexp" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "odoc" {with-doc} diff --git a/bap-cxxfilt.opam b/bap-cxxfilt.opam index cacc15a35..3cdbf201e 100644 --- a/bap-cxxfilt.opam +++ b/bap-cxxfilt.opam @@ -13,7 +13,7 @@ depends: [ "bap-demangle" {= version} "bap-std" {= version} "conf-binutils" {>= "0.3"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "odoc" {with-doc} ] diff --git a/bap-demangle.opam b/bap-demangle.opam index 35563b8a5..8c8c336f2 100644 --- a/bap-demangle.opam +++ b/bap-demangle.opam @@ -13,7 +13,7 @@ depends: [ "bap-core-theory" {= version} "bap-knowledge" {= version} "bap-main" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "odoc" {with-doc} ] diff --git a/bap-dependencies.opam b/bap-dependencies.opam index d7410b4cf..fb18e4a01 100644 --- a/bap-dependencies.opam +++ b/bap-dependencies.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ogre" {= version} "ppx_bap" {= version} diff --git a/bap-disassemble.opam b/bap-disassemble.opam index 86181fbd0..fc92a0a56 100644 --- a/bap-disassemble.opam +++ b/bap-disassemble.opam @@ -19,7 +19,7 @@ depends: [ "bitvec-order" {= version} "bitvec-sexp" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "bap-common" {= version} "ogre" {= version} diff --git a/bap-dump-symbols.opam b/bap-dump-symbols.opam index f5e68a388..5a0708f98 100644 --- a/bap-dump-symbols.opam +++ b/bap-dump-symbols.opam @@ -11,7 +11,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "graphlib" {= version} "bap-common" {= version} "ppx_bap" {= version} diff --git a/bap-dwarf.opam b/bap-dwarf.opam index 04f466622..30094bc9a 100644 --- a/bap-dwarf.opam +++ b/bap-dwarf.opam @@ -11,7 +11,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "regular" {= version} diff --git a/bap-elementary.opam b/bap-elementary.opam index 23ada57ad..b5777c239 100644 --- a/bap-elementary.opam +++ b/bap-elementary.opam @@ -15,7 +15,7 @@ depends: [ "bap-knowledge" {= version} "bap-std" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-elf.opam b/bap-elf.opam index 000bfa8e4..287df7704 100644 --- a/bap-elf.opam +++ b/bap-elf.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-dwarf" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "ppx_bitstring" {>= "4.0.0" & < "5.0.0"} diff --git a/bap-flatten.opam b/bap-flatten.opam index bbd4f305a..658c8c480 100644 --- a/bap-flatten.opam +++ b/bap-flatten.opam @@ -11,7 +11,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "odoc" {with-doc} ] diff --git a/bap-frontc.opam b/bap-frontc.opam index aaa71bcb4..7adf5c6c8 100644 --- a/bap-frontc.opam +++ b/bap-frontc.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-c" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "FrontC" {>= "4.1.0"} "bap-common" {= version} "odoc" {with-doc} diff --git a/bap-frontend.opam b/bap-frontend.opam index 9352f4ce0..c72876b62 100644 --- a/bap-frontend.opam +++ b/bap-frontend.opam @@ -14,7 +14,7 @@ depends: [ "bap-knowledge" {= version} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ocamlfind" "regular" {= version} diff --git a/bap-future.opam b/bap-future.opam index f3de322c7..53ef614e4 100644 --- a/bap-future.opam +++ b/bap-future.opam @@ -12,7 +12,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "bap-common" {= version} "odoc" {with-doc} diff --git a/bap-ghidra.opam b/bap-ghidra.opam index eead1e0f5..b8bc4f001 100644 --- a/bap-ghidra.opam +++ b/bap-ghidra.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ppx_bap" {= version} "bap-common" {= version} "bap-std" {= version} diff --git a/bap-glibc-runtime.opam b/bap-glibc-runtime.opam index e45ad10b0..9c6e0668f 100644 --- a/bap-glibc-runtime.opam +++ b/bap-glibc-runtime.opam @@ -14,7 +14,7 @@ depends: [ "bap-c" {= version} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ogre" {= version} "odoc" {with-doc} diff --git a/bap-ida-plugin.opam b/bap-ida-plugin.opam index 64e0120f6..2a7326d6b 100644 --- a/bap-ida-plugin.opam +++ b/bap-ida-plugin.opam @@ -11,7 +11,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "regular" {= version} diff --git a/bap-ida.opam b/bap-ida.opam index 3e7a7c1c8..d1c27b87c 100644 --- a/bap-ida.opam +++ b/bap-ida.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "regular" {= version} "fileutils" "bap-common" {= version} diff --git a/bap-knowledge.opam b/bap-knowledge.opam index 0d7ace58d..b691366e9 100644 --- a/bap-knowledge.opam +++ b/bap-knowledge.opam @@ -10,7 +10,8 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" + "core_unix" "ppx_bap" {= version} "bap-common" {= version} "monads" {= version} diff --git a/bap-llvm.opam b/bap-llvm.opam index fbcfcc1a2..22e6d6c48 100644 --- a/bap-llvm.opam +++ b/bap-llvm.opam @@ -11,8 +11,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-std" {= version} - "conf-bap-llvm" {>= "1.8"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "mmap" "monads" {= version} "bap-common" {= version} @@ -25,7 +24,6 @@ build: [ ["dune" "subst"] {dev} [ "ocaml" "tools/configure.ml" - "--with-llvm-config=%{conf-bap-llvm:config}%" "--%{llvm-shared?disable:enable}%-llvm-static" ] [ diff --git a/bap-llvm.opam.template b/bap-llvm.opam.template index 8b7096511..c4ae960eb 100644 --- a/bap-llvm.opam.template +++ b/bap-llvm.opam.template @@ -2,7 +2,6 @@ build: [ ["dune" "subst"] {dev} [ "ocaml" "tools/configure.ml" - "--with-llvm-config=%{conf-bap-llvm:config}%" "--%{llvm-shared?disable:enable}%-llvm-static" ] [ diff --git a/bap-main.opam b/bap-main.opam index 1384328d1..3cce2c4f0 100644 --- a/bap-main.opam +++ b/bap-main.opam @@ -14,10 +14,11 @@ depends: [ "bap-future" {= version} "bap-plugins" {= version} "bap-recipe" {= version} - "base" {>= "v0.14" & < "v0.16"} + "base" + "core_kernel" "cmdliner" {>= "1.0" & < "2.0"} "bap-common" {= version} - "stdio" {>= "v0.14" & < "v0.16"} + "stdio" "odoc" {with-doc} ] build: [ diff --git a/bap-mc.opam b/bap-mc.opam index 40a9a4496..f19bff597 100644 --- a/bap-mc.opam +++ b/bap-mc.opam @@ -15,7 +15,7 @@ depends: [ "bap-main" {= version} "bap-std" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ogre" {= version} "ppx_bap" {= version} diff --git a/bap-microx.opam b/bap-microx.opam index 5ca6452db..099f114d6 100644 --- a/bap-microx.opam +++ b/bap-microx.opam @@ -11,7 +11,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "bap-common" {= version} "ppx_bap" {= version} diff --git a/bap-mips.opam b/bap-mips.opam index f1ea9e606..43dd1e2da 100644 --- a/bap-mips.opam +++ b/bap-mips.opam @@ -17,7 +17,7 @@ depends: [ "bap-knowledge" {= version} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ogre" {= version} "ppx_bap" {= version} diff --git a/bap-objdump.opam b/bap-objdump.opam index 0aebe19d9..70d0bb1f0 100644 --- a/bap-objdump.opam +++ b/bap-objdump.opam @@ -19,7 +19,7 @@ depends: [ "bitvec-sexp" {= version} "bitvec" {= version} "conf-binutils" {>= "0.3"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "re" {>= "1.0" & < "2.0"} diff --git a/bap-optimization.opam b/bap-optimization.opam index 3a7cc8a3c..02caffdab 100644 --- a/bap-optimization.opam +++ b/bap-optimization.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "bap-std" {= version} diff --git a/bap-patterns.opam b/bap-patterns.opam index 7b9676e8b..f41195dd3 100644 --- a/bap-patterns.opam +++ b/bap-patterns.opam @@ -20,7 +20,7 @@ depends: [ "bitvec-order" {= version} "bitvec-sexp" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "fileutils" "bap-common" {= version} "ppx_bap" {= version} diff --git a/bap-phoenix.opam b/bap-phoenix.opam index d8abb8ba0..b5805921a 100644 --- a/bap-phoenix.opam +++ b/bap-phoenix.opam @@ -14,7 +14,7 @@ depends: [ "bap-std" {= version} "cmdliner" "cmdliner" {>= "1.0" & < "2.0"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ezjsonm" "graphlib" {= version} "ppx_bap" {= version} diff --git a/bap-piqi.opam b/bap-piqi.opam deleted file mode 100644 index 61b3b62ab..000000000 --- a/bap-piqi.opam +++ /dev/null @@ -1,35 +0,0 @@ -# This file is generated by dune, edit dune-project instead -opam-version: "2.0" -version: "dev" -synopsis: "Serializes BAP Project in various formats using piqi" -maintainer: ["Ivan Gotovchits "] -authors: ["The BAP Team"] -license: "MIT" -tags: ["bap" "bap-plugin"] -homepage: "https://github.com/BinaryAnalysisPlatform/bap" -bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" -depends: [ - "dune" {>= "3.1"} - "bap-common" {= version} - "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} - "piqi" {>= "0.7.8"} - "odoc" {with-doc} -] -build: [ - ["dune" "subst"] {dev} - [ - "dune" - "build" - "-p" - name - "-j" - jobs - "--promote-install-files=false" - "@install" - "@runtest" {with-test} - "@doc" {with-doc} - ] - ["dune" "install" "-p" name "--create-install-files" name] -] -dev-repo: "git+https://github.com/BinaryAnalysisPlatform/bap.git" diff --git a/bap-plugins.opam b/bap-plugins.opam index 784ef9f73..d88a663be 100644 --- a/bap-plugins.opam +++ b/bap-plugins.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-bundle" {= version} "bap-future" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "fileutils" "ocamlfind" "ppx_bap" {= version} diff --git a/bap-powerpc.opam b/bap-powerpc.opam index ac5cee521..0eecde311 100644 --- a/bap-powerpc.opam +++ b/bap-powerpc.opam @@ -18,7 +18,7 @@ depends: [ "bap-knowledge" {= version} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "ogre" {= version} "ppx_bap" {= version} diff --git a/bap-primus-dictionary.opam b/bap-primus-dictionary.opam index 450a6485a..fea93a697 100644 --- a/bap-primus-dictionary.opam +++ b/bap-primus-dictionary.opam @@ -14,7 +14,7 @@ depends: [ "bap-core-theory" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-primus-exploring-scheduler.opam b/bap-primus-exploring-scheduler.opam index e8d0b2bf0..1afee1ee5 100644 --- a/bap-primus-exploring-scheduler.opam +++ b/bap-primus-exploring-scheduler.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-greedy-scheduler.opam b/bap-primus-greedy-scheduler.opam index e8d0b2bf0..1afee1ee5 100644 --- a/bap-primus-greedy-scheduler.opam +++ b/bap-primus-greedy-scheduler.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-limit.opam b/bap-primus-limit.opam index b461ad79a..a058069d3 100644 --- a/bap-primus-limit.opam +++ b/bap-primus-limit.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-primus-lisp.opam b/bap-primus-lisp.opam index 1b2adc595..1583a2e05 100644 --- a/bap-primus-lisp.opam +++ b/bap-primus-lisp.opam @@ -17,7 +17,7 @@ depends: [ "bap-primus" {= version} "bap-std" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "regular" {= version} "odoc" {with-doc} diff --git a/bap-primus-loader.opam b/bap-primus-loader.opam index d8072e954..5ee59bf80 100644 --- a/bap-primus-loader.opam +++ b/bap-primus-loader.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ogre" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-mark-visited.opam b/bap-primus-mark-visited.opam index f0dbd8429..13f4e9af8 100644 --- a/bap-primus-mark-visited.opam +++ b/bap-primus-mark-visited.opam @@ -14,7 +14,7 @@ depends: [ "bap-primus-track-visited" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-primus-powerpc.opam b/bap-primus-powerpc.opam index ca01881cc..2f4ae9d16 100644 --- a/bap-primus-powerpc.opam +++ b/bap-primus-powerpc.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-primus-print.opam b/bap-primus-print.opam index 9c7a5e889..3502e198e 100644 --- a/bap-primus-print.opam +++ b/bap-primus-print.opam @@ -16,7 +16,7 @@ depends: [ "bap-primus" {= version} "bap-std" {= version} "bare" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-promiscuous.opam b/bap-primus-promiscuous.opam index b98fbb057..219b65da5 100644 --- a/bap-primus-promiscuous.opam +++ b/bap-primus-promiscuous.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-propagate-taint.opam b/bap-primus-propagate-taint.opam index 849dcf30f..7701b63ab 100644 --- a/bap-primus-propagate-taint.opam +++ b/bap-primus-propagate-taint.opam @@ -14,7 +14,7 @@ depends: [ "bap-primus" {= version} "bap-std" {= version} "bap-taint" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-random.opam b/bap-primus-random.opam index 6c2fe76a2..38cf1f632 100644 --- a/bap-primus-random.opam +++ b/bap-primus-random.opam @@ -16,7 +16,7 @@ depends: [ "bap-std" {= version} "bitvec-sexp" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "zarith" "odoc" {with-doc} diff --git a/bap-primus-region.opam b/bap-primus-region.opam index c331c980c..3e8753aa9 100644 --- a/bap-primus-region.opam +++ b/bap-primus-region.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-round-robin-scheduler.opam b/bap-primus-round-robin-scheduler.opam index e8d0b2bf0..1afee1ee5 100644 --- a/bap-primus-round-robin-scheduler.opam +++ b/bap-primus-round-robin-scheduler.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-symbolic-executor.opam b/bap-primus-symbolic-executor.opam index c8bae944b..6965c37ca 100644 --- a/bap-primus-symbolic-executor.opam +++ b/bap-primus-symbolic-executor.opam @@ -15,12 +15,12 @@ depends: [ "bap-primus-track-visited" {= version} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "regular" {= version} "bitvec" {= version} "zarith" - "z3" {>= "4.8.8-1"} + "z3" "odoc" {with-doc} ] build: [ diff --git a/bap-primus-systems.opam b/bap-primus-systems.opam index a5ea180a2..b83dc1846 100644 --- a/bap-primus-systems.opam +++ b/bap-primus-systems.opam @@ -15,7 +15,7 @@ depends: [ "bap-main" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-taint.opam b/bap-primus-taint.opam index 26e77081b..f870d682f 100644 --- a/bap-primus-taint.opam +++ b/bap-primus-taint.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-taint" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-test.opam b/bap-primus-test.opam index 93c8c67ad..b89e8c220 100644 --- a/bap-primus-test.opam +++ b/bap-primus-test.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-primus-track-visited.opam b/bap-primus-track-visited.opam index b9b4d4b4e..4e250ff91 100644 --- a/bap-primus-track-visited.opam +++ b/bap-primus-track-visited.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-primus-wandering-scheduler.opam b/bap-primus-wandering-scheduler.opam index e8d0b2bf0..1afee1ee5 100644 --- a/bap-primus-wandering-scheduler.opam +++ b/bap-primus-wandering-scheduler.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "odoc" {with-doc} ] diff --git a/bap-primus-x86.opam b/bap-primus-x86.opam index 8287f7ff6..04dc8c01e 100644 --- a/bap-primus-x86.opam +++ b/bap-primus-x86.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-x86" {= version} "odoc" {with-doc} ] diff --git a/bap-primus.opam b/bap-primus.opam index c5d395e63..548d35cfe 100644 --- a/bap-primus.opam +++ b/bap-primus.opam @@ -20,10 +20,10 @@ depends: [ "bap-strings" {= version} "bitvec-binprot" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "graphlib" {= version} "monads" {= version} - "parsexp" {>= "v0.14" & < "v0.16"} + "parsexp" "ppx_bap" {= version} "regular" {= version} "uuidm" {>= "0.9.7"} diff --git a/bap-print.opam b/bap-print.opam index e96015b10..f68758360 100644 --- a/bap-print.opam +++ b/bap-print.opam @@ -14,7 +14,7 @@ depends: [ "bap-demangle" {= version} "bap-knowledge" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "graphlib" {= version} "ogre" {= version} "ppx_bap" {= version} diff --git a/bap-radare2.opam b/bap-radare2.opam index 6dec2df3f..608975d0e 100644 --- a/bap-radare2.opam +++ b/bap-radare2.opam @@ -21,7 +21,7 @@ depends: [ "bap-std" {= version} "bitvec" {= version} "conf-radare2" - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ogre" {= version} "ppx_bap" {= version} "re" {>= "1.0" & < "2.0"} diff --git a/bap-raw.opam b/bap-raw.opam index 8d8a4935d..da4c606c7 100644 --- a/bap-raw.opam +++ b/bap-raw.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "bap-std" {= version} diff --git a/bap-recipe-command.opam b/bap-recipe-command.opam index 1c83d1688..bdd65ca61 100644 --- a/bap-recipe-command.opam +++ b/bap-recipe-command.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-main" {= version} "bap-recipe" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-recipe.opam b/bap-recipe.opam index 30bfdf7d2..81151df6a 100644 --- a/bap-recipe.opam +++ b/bap-recipe.opam @@ -13,8 +13,8 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-common" {= version} - "parsexp" {>= "v0.14" & < "v0.16"} - "stdio" {>= "v0.14" & < "v0.16"} + "parsexp" + "stdio" "stdlib-shims" "uuidm" {>= "0.9.7"} "odoc" {with-doc} diff --git a/bap-relocatable.opam b/bap-relocatable.opam index 5ca8913cd..00f50878c 100644 --- a/bap-relocatable.opam +++ b/bap-relocatable.opam @@ -23,7 +23,7 @@ depends: [ "bitvec-order" {= version} "bitvec-sexp" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "ogre" {= version} "ppx_bap" {= version} diff --git a/bap-report.opam b/bap-report.opam index bd1decb2c..507356be8 100644 --- a/bap-report.opam +++ b/bap-report.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "bap-std" {= version} "odoc" {with-doc} diff --git a/bap-riscv.opam b/bap-riscv.opam index ba986487e..7ef586460 100644 --- a/bap-riscv.opam +++ b/bap-riscv.opam @@ -17,7 +17,7 @@ depends: [ "bap-c" {= version} "bap-knowledge" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "ogre" {= version} "ppx_bap" {= version} diff --git a/bap-run.opam b/bap-run.opam index 38acb0bfb..b9022b07e 100644 --- a/bap-run.opam +++ b/bap-run.opam @@ -15,7 +15,7 @@ depends: [ "bap-knowledge" {= version} "bap-primus" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "graphlib" {= version} "monads" {= version} "regular" {= version} diff --git a/bap-specification.opam b/bap-specification.opam index 74e4dff1d..7c1943035 100644 --- a/bap-specification.opam +++ b/bap-specification.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ogre" {= version} "odoc" {with-doc} ] diff --git a/bap-ssa.opam b/bap-ssa.opam index bbf6c0638..fd210f8ce 100644 --- a/bap-ssa.opam +++ b/bap-ssa.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-common" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-std.opam b/bap-std.opam index 91c0c275b..5e244f942 100644 --- a/bap-std.opam +++ b/bap-std.opam @@ -23,7 +23,7 @@ depends: [ "bitvec" {= version} "camlzip" {>= "1.07" & < "2.0"} "cmdliner" - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "fileutils" "graphlib" {= version} "mmap" diff --git a/bap-strings.opam b/bap-strings.opam index 2a1970abd..596ff82fb 100644 --- a/bap-strings.opam +++ b/bap-strings.opam @@ -22,7 +22,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "ppx_bap" {= version} "odoc" {with-doc} diff --git a/bap-stub-resolver.opam b/bap-stub-resolver.opam index d1dd3c727..7f1972a49 100644 --- a/bap-stub-resolver.opam +++ b/bap-stub-resolver.opam @@ -19,7 +19,7 @@ depends: [ "bitvec-order" {= version} "bitvec-sexp" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ogre" {= version} "ppx_bap" {= version} "odoc" {with-doc} diff --git a/bap-symbol-reader.opam b/bap-symbol-reader.opam index f4a6d0124..c33540207 100644 --- a/bap-symbol-reader.opam +++ b/bap-symbol-reader.opam @@ -16,7 +16,7 @@ depends: [ "bap-main" {= version} "bap-relation" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "odoc" {with-doc} ] build: [ diff --git a/bap-systemz.opam b/bap-systemz.opam index 46f2949d4..f5e970b7a 100644 --- a/bap-systemz.opam +++ b/bap-systemz.opam @@ -16,7 +16,7 @@ depends: [ "bap-main" {= version} "bap-std" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ogre" {= version} "ppx_bap" {= version} "odoc" {with-doc} diff --git a/bap-taint-propagator.opam b/bap-taint-propagator.opam index b7918b6cc..a2685d805 100644 --- a/bap-taint-propagator.opam +++ b/bap-taint-propagator.opam @@ -13,7 +13,7 @@ depends: [ "bap-common" {= version} "bap-core-theory" {= version} "bap-microx" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "graphlib" {= version} "monads" {= version} "regular" {= version} diff --git a/bap-taint.opam b/bap-taint.opam index 4fc2a1581..b04ce88b7 100644 --- a/bap-taint.opam +++ b/bap-taint.opam @@ -14,7 +14,7 @@ depends: [ "bap-primus" {= version} "bap-std" {= version} "bap-strings" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "monads" {= version} "ppx_bap" {= version} "regular" {= version} diff --git a/bap-term-mapper.opam b/bap-term-mapper.opam index b57b3d56c..8c297e11e 100644 --- a/bap-term-mapper.opam +++ b/bap-term-mapper.opam @@ -14,7 +14,7 @@ depends: [ "bap-common" {= version} "bap-main" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ppx_bap" {= version} "regular" {= version} "odoc" {with-doc} diff --git a/bap-thumb.opam b/bap-thumb.opam index 96c77c4bc..5bd2c92ab 100644 --- a/bap-thumb.opam +++ b/bap-thumb.opam @@ -17,7 +17,7 @@ depends: [ "bap-main" {= version} "bap-std" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ogre" {= version} "ppx_bap" {= version} "odoc" {with-doc} diff --git a/bap-toplevel.opam b/bap-toplevel.opam index 3c4179888..a4740b0e0 100644 --- a/bap-toplevel.opam +++ b/bap-toplevel.opam @@ -14,7 +14,7 @@ depends: [ "dune" {>= "3.1"} "bap-common" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ocamlfind" "odoc" {with-doc} ] diff --git a/bap-trace.opam b/bap-trace.opam index 3d18922c2..487e26d44 100644 --- a/bap-trace.opam +++ b/bap-trace.opam @@ -15,7 +15,7 @@ depends: [ "bap-plugins" {= version} "bap-std" {= version} "bap-traces" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ppx_bap" {= version} "regular" {= version} "uri" diff --git a/bap-traces.opam b/bap-traces.opam index fd3a34b41..8349705c6 100644 --- a/bap-traces.opam +++ b/bap-traces.opam @@ -12,7 +12,7 @@ depends: [ "dune" {>= "3.1"} "bap-common" {= version} "bap-std" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ppx_bap" {= version} "regular" {= version} "uri" {>= "1.9.0"} diff --git a/bap-trivial-condition-form.opam b/bap-trivial-condition-form.opam index c4ae59347..d95fb4de1 100644 --- a/bap-trivial-condition-form.opam +++ b/bap-trivial-condition-form.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "bap-std" {= version} "odoc" {with-doc} diff --git a/bap-warn-unused.opam b/bap-warn-unused.opam index 0bb27692c..8f2c4d0ad 100644 --- a/bap-warn-unused.opam +++ b/bap-warn-unused.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "bap-common" {= version} "bap-std" {= version} "odoc" {with-doc} diff --git a/bap-x86.opam b/bap-x86.opam index 4305b3a1a..c72fc9332 100644 --- a/bap-x86.opam +++ b/bap-x86.opam @@ -23,7 +23,7 @@ depends: [ "bap-primus" {= version} "bap-std" {= version} "bitvec" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ogre" {= version} "ppx_bap" {= version} "regular" {= version} diff --git a/bare.opam b/bare.opam index 3cd1fc74d..ed85ad484 100644 --- a/bare.opam +++ b/bare.opam @@ -13,8 +13,8 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-common" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} - "parsexp" {>= "v0.14" & < "v0.16"} + "core" + "parsexp" "odoc" {with-doc} ] build: [ diff --git a/benchmarks/bench_dom.ml b/benchmarks/bench_dom.ml index defb1ac5f..e09eb021c 100644 --- a/benchmarks/bench_dom.ml +++ b/benchmarks/bench_dom.ml @@ -7,84 +7,73 @@ let filename = "arm-binaries/coreutils/coreutils_O1_ls" let sizes_to_test = 10 module Cfg = Graphlib.Ir -module CFG = Graphlib.To_ocamlgraph(Cfg) -module DOM = Graph.Dominator.Make(CFG) -module SCC = Graph.Components.Make(CFG) +module CFG = Graphlib.To_ocamlgraph (Cfg) +module DOM = Graph.Dominator.Make (CFG) +module SCC = Graph.Components.Make (CFG) let proj = Project.from_file filename |> ok_exn - let syms = Project.symbols proj -(** [scale_linear ~input:(x1,x2) ~output:(y1,y2)] - will return a linear function $f(x) = ax + b$ such that: +(** [scale_linear ~input:(x1,x2) ~output:(y1,y2)] will return a linear function + $f(x) = ax + b$ such that: {[ f x1 = y1; - f x2 = y2; + f x2 = y2 ]} *) -let scale_linear ~input:(x1,x2) ~output:(y1,y2) = +let scale_linear ~input:(x1, x2) ~output:(y1, y2) = let open Float in - let x1,x2,y1,y2 = float x1, float x2, float y1, float y2 in + let x1, x2, y1, y2 = (float x1, float x2, float y1, float y2) in let a = (y1 - y2) / (x1 - x2) in - let b = y1 - x1 * (y1 - y2) / (x1-x2) in - fun x -> to_int (round (a * (float x) + b)) - - -let functions,sizes,index = - let functions = Term.enum sub_t (Project.program proj) |> - Seq.map ~f:Cfg.of_sub |> Seq.to_array in + let b = y1 - (x1 * (y1 - y2) / (x1 - x2)) in + fun x -> to_int (round ((a * float x) + b)) + +let functions, sizes, index = + let functions = + Term.enum sub_t (Project.program proj) + |> Seq.map ~f:Cfg.of_sub |> Seq.to_array + in let size g = Cfg.number_of_nodes g + Cfg.number_of_edges g in Array.sort functions ~cmp:(fun x y -> Int.compare (size x) (size y)); let max_idx = Array.length functions - 1 in let min_size = size functions.(0) in let max_size = min 1000 @@ size functions.(max_idx) in let size = - scale_linear ~input:(0,sizes_to_test-1) ~output:(min_size,max_size) in - let index = - scale_linear ~input:(min_size,max_size) ~output:(0,max_idx) in + scale_linear ~input:(0, sizes_to_test - 1) ~output:(min_size, max_size) + in + let index = scale_linear ~input:(min_size, max_size) ~output:(0, max_idx) in let sizes = List.init sizes_to_test ~f:size in - functions,List.rev sizes,index + (functions, List.rev sizes, index) -let ocamlgraph cfg entry = - DOM.compute_idom cfg entry +let ocamlgraph cfg entry = DOM.compute_idom cfg entry let graphlib cfg entry = Tree.parent (Graphlib.dominators (module Cfg) cfg entry) -let dom algo cfg = - algo cfg (Seq.hd_exn @@ Cfg.nodes cfg) - -let run f size = - stage (fun () -> f functions.(index size)) - +let dom algo cfg = algo cfg (Seq.hd_exn @@ Cfg.nodes cfg) +let run f size = stage (fun () -> f functions.(index size)) let graphlib_scc cfg = - Graphlib.strong_components (module Cfg) cfg |> - Partition.equiv + Graphlib.strong_components (module Cfg) cfg |> Partition.equiv let ocamlgraph_scc cfg = - let _,get = SCC.scc cfg in + let _, get = SCC.scc cfg in fun x y -> get x = get y let indexed = Bench.Test.create_indexed ~args:sizes - let dom_test = - Bench.Test.create_group ~name:"dom" [ - indexed ~name:"ocamlgraph" (run (dom ocamlgraph)); - indexed ~name:"graphlib" (run (dom graphlib)); - ] - - + Bench.Test.create_group ~name:"dom" + [ + indexed ~name:"ocamlgraph" (run (dom ocamlgraph)); + indexed ~name:"graphlib" (run (dom graphlib)); + ] let scc_test = - Bench.Test.create_group ~name:"scc" [ - indexed ~name:"graphlib" (run graphlib_scc); - indexed ~name:"ocamlgraph" (run ocamlgraph_scc); - ] - + Bench.Test.create_group ~name:"scc" + [ + indexed ~name:"graphlib" (run graphlib_scc); + indexed ~name:"ocamlgraph" (run ocamlgraph_scc); + ] -let tests = [ - dom_test; - scc_test; -] +let tests = [ dom_test; scc_test ] diff --git a/benchmarks/bench_image.ml b/benchmarks/bench_image.ml index f4e4de3dd..3ce21aa00 100644 --- a/benchmarks/bench_image.ml +++ b/benchmarks/bench_image.ml @@ -4,53 +4,62 @@ open Or_error open Bap.Std let n = 100_000 - let data = Bigstring.init n ~f:(fun _ -> '\x00') let null = Addr.of_int ~width:32 0 let zero = Word.of_int ~width:8 0 let mem = ok_exn (Memory.create BigEndian null data) + let tab = Memory.With_error.foldi mem ~init:Table.empty ~f:(fun addr word tab -> - Memory.view ~from:addr ~words:1 mem >>= fun mem -> - Table.add tab mem word) -let tab = match tab with + Memory.view ~from:addr ~words:1 mem >>= fun mem -> Table.add tab mem word) + +let tab = + match tab with | Ok tab -> tab | Error err -> - eprintf "Failing: %s" (Error.to_string_hum err); - invalid_arg "invalid memory" + eprintf "Failing: %s" (Error.to_string_hum err); + invalid_arg "invalid memory" let sum_while () = let sum = ref zero in for i = 0 to n - 1 do - sum := Word.Int_exn.(!sum + Word.of_int ~width:8 (Char.to_int data.{i})); + sum := Word.Int_exn.(!sum + Word.of_int ~width:8 (Char.to_int data.{i})) done let sum_foldi () = let (_ : word) = Memory.foldi mem ~f:(fun _ w1 w2 -> Word.Int_exn.(w1 + w2)) ~init:zero - in () + in + () let sum_fold () = let (_ : word) = Memory.fold mem ~f:(fun w1 w2 -> Word.Int_exn.(w1 + w2)) ~init:zero - in () + in + () let sum_foldm () = let (_ : word Or_error.t) = - Memory.With_error.fold mem ~f:(fun w1 w2 -> Word.Int_err.(!$w1 + !$w2)) ~init:zero - in () + Memory.With_error.fold mem + ~f:(fun w1 w2 -> Word.Int_err.(!$w1 + !$w2)) + ~init:zero + in + () let sum_fold_tab () = let (_ : word) = Table.fold tab ~f:(fun w1 w2 -> Word.Int_exn.(w1 + w2)) ~init:zero - in () + in + () -let test = Bench.Test.create_group ~name:"image" [ - Bench.Test.create ~name:"Bap_image.map_sum_direct" sum_while; - Bench.Test.create ~name:"Bap_image.map_sum_fold" sum_fold; - Bench.Test.create ~name:"Bap_image.map_sum_foldi" sum_foldi; - Bench.Test.create ~name:"Bap_image.map_sum_foldm" sum_foldm; - Bench.Test.create ~name:"Bap_image.map_sum_table" sum_fold_tab; - ] +let test = + Bench.Test.create_group ~name:"image" + [ + Bench.Test.create ~name:"Bap_image.map_sum_direct" sum_while; + Bench.Test.create ~name:"Bap_image.map_sum_fold" sum_fold; + Bench.Test.create ~name:"Bap_image.map_sum_foldi" sum_foldi; + Bench.Test.create ~name:"Bap_image.map_sum_foldm" sum_foldm; + Bench.Test.create ~name:"Bap_image.map_sum_table" sum_fold_tab; + ] -let tests = [test] +let tests = [ test ] diff --git a/benchmarks/run_benchmarks.ml b/benchmarks/run_benchmarks.ml index e49585834..836649aeb 100644 --- a/benchmarks/run_benchmarks.ml +++ b/benchmarks/run_benchmarks.ml @@ -2,10 +2,7 @@ open Core open Core_bench.Std open Bap.Std -let benchmarks = Bench.make_command @@ List.concat [ - Bench_dom.tests; - Bench_image.tests; - ] - +let benchmarks = + Bench.make_command @@ List.concat [ Bench_dom.tests; Bench_image.tests ] let () = Command.run benchmarks diff --git a/bitvec-binprot.opam b/bitvec-binprot.opam index 3d7b3f5fa..f660654de 100644 --- a/bitvec-binprot.opam +++ b/bitvec-binprot.opam @@ -10,7 +10,7 @@ homepage: "https://github.com/BinaryAnalysisPlatform/bap" bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} - "bin_prot" {>= "v0.14" & < "v0.16"} + "bin_prot" "bitvec" {= version} "ppx_bap" {= version} "odoc" {with-doc} diff --git a/dune b/dune index 7aba364fb..4189c38fd 100644 --- a/dune +++ b/dune @@ -1,49 +1,54 @@ (env (dev - (flags (:standard - -warn-error -A - -w -6-9-27-32..34-37-50-58)) - (ocamlopt_flags (:standard -O3)))) - + (flags + (:standard -warn-error -A -w -6-9-27-32..34-37-50-58)) + (ocamlopt_flags + (:standard -O3)))) (rule (target config.status.in) (mode fallback) - (action (with-stdout-to %{target} (echo "")))) + (action + (with-stdout-to + %{target} + (echo "")))) (rule (alias config) (target config.status) (deps config.status.in) (action - (with-stdin-from %{deps} - (with-stdout-to %{target} - (chdir %{workspace_root} + (with-stdin-from + %{deps} + (with-stdout-to + %{target} + (chdir + %{workspace_root} (run ./tools/rewrite.exe -init)))))) - (library (name ppx_bap) (public_name ppx_bap) (kind ppx_rewriter) (libraries - ppx_assert - ppx_bench - ppx_bin_prot - ppx_compare - ppx_enumerate - ppx_fields_conv - ppx_hash - ppx_here - ppxlib - ppx_optcomp - ppx_sexp_conv - ppx_sexp_value - ppx_variants_conv - ppx_expect - ppx_inline_test) + ppx_assert + ppx_bench + ppx_bin_prot + ppx_compare + ppx_enumerate + ppx_fields_conv + ppx_hash + ppx_here + ppxlib + ppx_optcomp + ppx_sexp_conv + ppx_sexp_value + ppx_variants_conv + ppx_expect + ppx_inline_test) (preprocess no_preprocessing)) (alias (name default) - (deps (alias_rec install))) + (deps + (alias_rec install))) diff --git a/dune-project b/dune-project index 52e68ced4..26d27591f 100644 --- a/dune-project +++ b/dune-project @@ -9,12 +9,6 @@ (maintainers "Ivan Gotovchits ") (source (github BinaryAnalysisPlatform/bap)) - -(explicit_js_mode) -(formatting disabled) - -(use_standard_c_and_cxx_flags false) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Package Descriptions ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -136,12 +130,12 @@ (share signatures) (share site_lisp)) (depends - (base (and (>= v0.14) (< v0.16))) - dune - dune-configurator - dune-site - (ocaml (> 4.08.0)) - (stdio (and (>= v0.14) (< v0.16))))) + base + dune + dune-configurator + dune-site + ocaml + stdio)) (package (name bap-primus-support) @@ -176,7 +170,7 @@ (depends (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)) (regular (= :version)))) @@ -191,7 +185,7 @@ (bap-main (= :version)) (bap-std (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (linenoise (and (>= 1.1.0) (< 2.0.0))) (monads (= :version)) (bap-common (= :version)) @@ -204,7 +198,7 @@ (depends (bap-common (= :version)) (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core fileutils (ppx_bap (= :version)) (regular (= :version)))) @@ -225,7 +219,7 @@ (bap-traces (= :version)) (bitvec-order (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (bap-common (= :version)) (ogre (= :version)) @@ -243,7 +237,7 @@ (bap-primus (= :version)) (bap-std (= :version)) (bap-strings (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (bap-common (= :version)) (ppx_bap (= :version)) @@ -261,7 +255,7 @@ (bap-std (= :version)) (bitvec-order (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (bap-common (= :version)) (ogre (= :version)) @@ -273,7 +267,7 @@ (tags (bap bap-library)) (depends (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)))) @@ -285,7 +279,7 @@ (bap-signatures (= :version)) (bap-std (= :version)) (camlzip (and (>= 1.0) (< 2.0))) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)) (regular (= :version)) @@ -299,7 +293,7 @@ (bap-byteweight (= :version)) (bap-std (= :version)) (cmdliner (and (>= 1.0) (< 2.0))) - (core_kernel (and (>= v0.14) (< v0.16))) + core fileutils (bap-common (= :version)) (ocurl (>= 0.9.0)) @@ -311,7 +305,7 @@ (synopsis "BAP Build Tool (ocamlbuild+bap)") (tags (bap bap-tool ocamlbuild)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) ocamlbuild ocamlfind @@ -323,7 +317,8 @@ (tags (bap bap-tool)) (depends (camlzip (and (>= 1.0) (< 2.0))) - (core_kernel (and (>= v0.14) (< v0.16))) + core + core_kernel fileutils (bap-common (= :version)) (ppx_bap (= :version)) @@ -339,7 +334,7 @@ (bap-core-theory (= :version)) (bap-knowledge (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)))) @@ -350,7 +345,7 @@ (depends (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core fileutils mmap (bap-common (= :version)) @@ -365,7 +360,7 @@ (depends (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (graphlib (= :version)) (bap-common (= :version)) (ppx_bap (= :version)) @@ -378,7 +373,7 @@ (tags (bap bap-plugin bap-pass)) (depends (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)))) (package @@ -398,7 +393,7 @@ details of the program behavior.") (bitvec-binprot (= :version)) (bitvec-order (= :version)) (bitvec-sexp (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)))) @@ -409,7 +404,7 @@ details of the program behavior.") (depends (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)))) (package @@ -420,7 +415,7 @@ details of the program behavior.") (bap-demangle (= :version)) (bap-std (= :version)) (conf-binutils (>= 0.3)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)))) (package @@ -431,7 +426,7 @@ details of the program behavior.") (bap-core-theory (= :version)) (bap-knowledge (= :version)) (bap-main (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)))) (package @@ -441,7 +436,7 @@ details of the program behavior.") (depends (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ogre (= :version)) (ppx_bap (= :version)) @@ -461,7 +456,7 @@ details of the program behavior.") (bitvec-order (= :version)) (bitvec-sexp (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (bap-common (= :version)) (ogre (= :version)) @@ -474,7 +469,7 @@ details of the program behavior.") (tags (bap bap-plugin)) (depends (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (graphlib (= :version)) (bap-common (= :version)) (ppx_bap (= :version)) @@ -486,7 +481,7 @@ details of the program behavior.") (tags (bap bap-library dwarf)) (depends (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)) (regular (= :version)))) @@ -501,7 +496,7 @@ details of the program behavior.") (bap-knowledge (= :version)) (bap-std (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-elf) @@ -510,7 +505,7 @@ details of the program behavior.") (depends (bap-dwarf (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)) (ppx_bitstring (and (>= 4.0.0) (< 5.0.0))) @@ -522,7 +517,7 @@ details of the program behavior.") (tags (bap bap-plugin)) (depends (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)))) (package @@ -534,7 +529,7 @@ details of the program behavior.") (bap-knowledge (= :version)) (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) ocamlfind (regular (= :version)))) @@ -546,7 +541,7 @@ details of the program behavior.") (depends (bap-c (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (FrontC (>= 4.1.0)) (bap-common (= :version)))) @@ -558,7 +553,7 @@ A library for reasoning about state based dynamic systems. This can \ be seen as a common denominator between Lwt and Async libraries.") (tags (bap future)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (bap-common (= :version)))) @@ -568,7 +563,7 @@ be seen as a common denominator between Lwt and Async libraries.") (tags (bap ghidra disassembler)) (allow_empty) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (ppx_bap (= :version)) (bap-common (= :version)) (bap-std (= :version)) @@ -583,7 +578,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-c (= :version)) (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ogre (= :version)))) @@ -591,7 +586,7 @@ be seen as a common denominator between Lwt and Async libraries.") (synopsis "BAP IDA Pro integration") (tags (bap bap-library ida-pro)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (regular (= :version)) fileutils (bap-common (= :version)) @@ -604,7 +599,7 @@ be seen as a common denominator between Lwt and Async libraries.") (tags (bap bap-plugin bap-ida ida-pro)) (depends (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)) (regular (= :version)))) @@ -614,7 +609,8 @@ be seen as a common denominator between Lwt and Async libraries.") (synopsis "Knowledge Reasoning and Representation Framework") (tags (bap bap-library knowledge)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core + core_unix (ppx_bap (= :version)) (bap-common (= :version)) (monads (= :version)))) @@ -625,9 +621,8 @@ be seen as a common denominator between Lwt and Async libraries.") (tags (bap bap-plugin llvm)) (depends (bap-std (= :version)) - (conf-bap-llvm (>= 1.8)) - (core_kernel (and (>= v0.14) (< v0.16))) - mmap + core + mmap (monads (= :version)) (bap-common (= :version)) (ogre (= :version)) @@ -642,10 +637,11 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-future (= :version)) (bap-plugins (= :version)) (bap-recipe (= :version)) - (base (and (>= v0.14) (< v0.16))) + base + core_kernel (cmdliner (and (>= 1.0) (< 2.0))) (bap-common (= :version)) - (stdio (and (>= v0.14) (< v0.16))))) + stdio)) (package (name bap-mc) @@ -657,7 +653,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-main (= :version)) (bap-std (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ogre (= :version)) (ppx_bap (= :version)) @@ -669,7 +665,7 @@ be seen as a common denominator between Lwt and Async libraries.") (tags (bap bap-plugin)) (depends (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (bap-common (= :version)) (ppx_bap (= :version)))) @@ -686,7 +682,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-knowledge (= :version)) (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ogre (= :version)) (ppx_bap (= :version)) @@ -707,7 +703,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bitvec-sexp (= :version)) (bitvec (= :version)) (conf-binutils (>= 0.3)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)) (re (and (>= 1.0) (< 2.0))))) @@ -717,7 +713,7 @@ be seen as a common denominator between Lwt and Async libraries.") (synopsis "A BAP IR optimization pass") (tags (bap bap-plugin bap-pass)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)) (bap-std (= :version)) @@ -739,7 +735,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bitvec-order (= :version)) (bitvec-sexp (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core fileutils (bap-common (= :version)) (ppx_bap (= :version)) @@ -755,22 +751,22 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-std (= :version)) cmdliner (cmdliner (and (>= 1.0) (< 2.0))) - (core_kernel (and (>= v0.14) (< v0.16))) + core ezjsonm (graphlib (= :version)) (ppx_bap (= :version)) (regular (= :version)) (text-tags (= :version)))) -(package - (name bap-piqi) - (synopsis "Serializes BAP Project in various formats using piqi") - (tags (bap bap-plugin)) - (depends - (bap-common (= :version)) - (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) - (piqi (>= 0.7.8)))) +; (package +; (name bap-piqi) +; (synopsis "Serializes BAP Project in various formats using piqi") +; (tags (bap bap-plugin)) +; (depends +; (bap-common (= :version)) +; (bap-std (= :version)) +; core +; (piqi (>= 0.7.8)))) (package (name bap-plugins) @@ -780,7 +776,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-bundle (= :version)) (bap-future (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core fileutils ocamlfind (ppx_bap (= :version)) @@ -799,7 +795,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-knowledge (= :version)) (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (ogre (= :version)) (ppx_bap (= :version)) @@ -821,10 +817,10 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-strings (= :version)) (bitvec-binprot (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (graphlib (= :version)) (monads (= :version)) - (parsexp (and (>= v0.14) (< v0.16))) + parsexp (ppx_bap (= :version)) (regular (= :version)) (uuidm (>= 0.9.7)))) @@ -838,7 +834,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-core-theory (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-primus-exploring-scheduler) @@ -848,7 +844,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -859,7 +855,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -870,7 +866,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-primus-lisp) @@ -884,7 +880,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-primus (= :version)) (bap-std (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (regular (= :version)))) @@ -896,7 +892,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ogre (= :version)))) (package @@ -908,7 +904,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-primus-track-visited (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-primus-powerpc) @@ -918,7 +914,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-primus-print) @@ -931,7 +927,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-primus (= :version)) (bap-std (= :version)) (bare (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -942,7 +938,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -954,7 +950,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-primus (= :version)) (bap-std (= :version)) (bap-taint (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -968,7 +964,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-std (= :version)) (bitvec-sexp (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) zarith)) @@ -980,7 +976,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -991,7 +987,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -1005,12 +1001,12 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-primus-track-visited (= :version)) (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (regular (= :version)) (bitvec (= :version)) - zarith - (z3 (>= 4.8.8-1)))) + zarith + z3)) (package (name bap-primus-systems) @@ -1022,7 +1018,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-main (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -1033,7 +1029,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-taint (= :version)))) (package @@ -1044,7 +1040,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-primus-track-visited) @@ -1054,7 +1050,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-primus-wandering-scheduler) @@ -1064,7 +1060,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)))) (package @@ -1075,7 +1071,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-common (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-x86 (= :version)))) (package @@ -1087,7 +1083,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-demangle (= :version)) (bap-knowledge (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (graphlib (= :version)) (ogre (= :version)) (ppx_bap (= :version)) @@ -1112,7 +1108,7 @@ be seen as a common denominator between Lwt and Async libraries.") (bap-std (= :version)) (bitvec (= :version)) conf-radare2 - (core_kernel (and (>= v0.14) (< v0.16))) + core (ogre (= :version)) (ppx_bap (= :version)) (re (and (>= 1.0) (< 2.0))) @@ -1124,7 +1120,7 @@ be seen as a common denominator between Lwt and Async libraries.") (synopsis "BAP raw files loader") (tags (bap bap-plugin)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)) (bap-std (= :version)) @@ -1140,9 +1136,9 @@ and support files if necessary.") (tags (bap bap-tool command-line)) (depends (bap-common (= :version)) - (parsexp (and (>= v0.14) (< v0.16))) - (stdio (and (>= v0.14) (< v0.16))) - stdlib-shims + parsexp + stdio + stdlib-shims (uuidm (>= 0.9.7)))) (package @@ -1153,7 +1149,7 @@ and support files if necessary.") (bap-common (= :version)) (bap-main (= :version)) (bap-recipe (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-relation) @@ -1184,7 +1180,7 @@ between the sets.") (bitvec-order (= :version)) (bitvec-sexp (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (ogre (= :version)) (ppx_bap (= :version)))) @@ -1194,7 +1190,7 @@ between the sets.") (synopsis "BAP plugin for reporting progress bars and statuses") (tags (bap bap-plugin)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (bap-std (= :version)))) @@ -1210,7 +1206,7 @@ between the sets.") (bap-c (= :version)) (bap-knowledge (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (ogre (= :version)) (ppx_bap (= :version)))) @@ -1225,7 +1221,7 @@ between the sets.") (bap-knowledge (= :version)) (bap-primus (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (graphlib (= :version)) (monads (= :version)) (regular (= :version)))) @@ -1238,7 +1234,7 @@ between the sets.") (bap-common (= :version)) (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ogre (= :version)))) (package @@ -1248,7 +1244,7 @@ between the sets.") (depends (bap-common (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-std) @@ -1268,7 +1264,7 @@ between the sets.") (bitvec (= :version)) (camlzip (and (>= 1.07) (< 2.0))) cmdliner - (core_kernel (and (>= v0.14) (< v0.16))) + core fileutils (graphlib (= :version)) mmap @@ -1301,7 +1297,7 @@ The library provides several algorithms: - Scanner - a generic algorithm for finding strings of characters (a library variant of strings tool)") (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)))) @@ -1310,7 +1306,7 @@ The library provides several algorithms: (synopsis "Finds strings in binaries using microexecution") (tags (bap bap-plugin)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (bap-beagle (= :version)) (ppx_bap (= :version)) @@ -1332,7 +1328,7 @@ The library provides several algorithms: (bitvec-order (= :version)) (bitvec-sexp (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ogre (= :version)) (ppx_bap (= :version)))) @@ -1347,7 +1343,7 @@ The library provides several algorithms: (bap-main (= :version)) (bap-relation (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))))) + core)) (package (name bap-systemz) @@ -1360,7 +1356,7 @@ The library provides several algorithms: (bap-main (= :version)) (bap-std (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ogre (= :version)) (ppx_bap (= :version)))) @@ -1373,7 +1369,7 @@ The library provides several algorithms: (bap-primus (= :version)) (bap-std (= :version)) (bap-strings (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (monads (= :version)) (ppx_bap (= :version)) (regular (= :version)))) @@ -1386,7 +1382,7 @@ The library provides several algorithms: (bap-common (= :version)) (bap-core-theory (= :version)) (bap-microx (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (graphlib (= :version)) (monads (= :version)) (regular (= :version)))) @@ -1400,7 +1396,7 @@ The library provides several algorithms: (bap-common (= :version)) (bap-main (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ppx_bap (= :version)) (regular (= :version)))) @@ -1416,7 +1412,7 @@ The library provides several algorithms: (bap-main (= :version)) (bap-std (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ogre (= :version)) (ppx_bap (= :version)))) @@ -1432,7 +1428,7 @@ as the interpreter for your BAP scripts, e.g., `baptop myprog.ml`") (depends (bap-common (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core ocamlfind)) (package @@ -1445,7 +1441,7 @@ as the interpreter for your BAP scripts, e.g., `baptop myprog.ml`") (bap-plugins (= :version)) (bap-std (= :version)) (bap-traces (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ppx_bap (= :version)) (regular (= :version)) uri)) @@ -1457,7 +1453,7 @@ as the interpreter for your BAP scripts, e.g., `baptop myprog.ml`") (depends (bap-common (= :version)) (bap-std (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ppx_bap (= :version)) (regular (= :version)) (uri (>= 1.9.0)) @@ -1468,7 +1464,7 @@ as the interpreter for your BAP scripts, e.g., `baptop myprog.ml`") (synopsis "Eliminates complex branch conditions") (tags (bap bap-plugin)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (bap-std (= :version)))) @@ -1477,7 +1473,7 @@ as the interpreter for your BAP scripts, e.g., `baptop myprog.ml`") (synopsis "Warns if an unused argument may cause an issue") (tags (bap bap-plugin)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (bap-std (= :version)))) @@ -1500,7 +1496,7 @@ as the interpreter for your BAP scripts, e.g., `baptop myprog.ml`") (bap-primus (= :version)) (bap-std (= :version)) (bitvec (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core (ogre (= :version)) (ppx_bap (= :version)) (regular (= :version)) @@ -1515,8 +1511,8 @@ of facts that are represented as s-expressions.") (tags (bap bap-library)) (depends (bap-common (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) - (parsexp (and (>= v0.14) (< v0.16))))) + core + parsexp)) (package (name bitvec) @@ -1529,7 +1525,7 @@ of facts that are represented as s-expressions.") (synopsis "Janestreet's Binprot serialization for Bitvec") (tags (bap)) (depends - (bin_prot (and (>= v0.14) (< v0.16))) + bin_prot (bitvec (= :version)) (ppx_bap (= :version)))) @@ -1564,7 +1560,7 @@ these functors, any algorithm written for OCamlGraph can be used on \ (tags (bap graph)) (depends (bap-common (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core ocamlgraph (ppx_bap (= :version)) (regular (= :version)))) @@ -1576,7 +1572,8 @@ these functors, any algorithm written for OCamlGraph can be used on \ (tags (bap monad)) (depends (bap-common (= :version)) - (core_kernel (and (>= v0.14) (< v0.16))) + core + jane_rope (ppx_bap (= :version)))) (package @@ -1588,7 +1585,7 @@ representation and provides a type safe monadic interface for quering \ and updating documents") (tags (bap sexp)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (monads (= :version)))) @@ -1597,22 +1594,22 @@ and updating documents") (synopsis "BAP-blessed ppx rewriters") (tags (bap ppx)) (depends - (ppx_assert (and (>= v0.14) (< v0.16))) - (ppx_bench (and (>= v0.14) (< v0.16))) - (ppx_bin_prot (and (>= v0.14) (< v0.16))) - (ppx_cold (and (>= v0.14) (< v0.16))) - (ppx_compare (and (>= v0.14) (< v0.16))) - (ppx_enumerate (and (>= v0.14) (< v0.16))) - (ppx_fields_conv (and (>= v0.14) (< v0.16))) - (ppx_hash (and (>= v0.14) (< v0.16))) - (ppx_here (and (>= v0.14) (< v0.16))) + ppx_assert + ppx_bench + ppx_bin_prot + ppx_cold + ppx_compare + ppx_enumerate + ppx_fields_conv + ppx_hash + ppx_here (ppxlib (>= 0.15.0)) - (ppx_optcomp (and (>= v0.14) (< v0.16))) - (ppx_expect (and (>= v0.14) (< v0.16))) - (ppx_inline_test (and (>= v0.14) (< v0.16))) - (ppx_sexp_conv (and (>= v0.14) (< v0.16))) - (ppx_sexp_value (and (>= v0.14) (< v0.16))) - (ppx_variants_conv (and (>= v0.14) (< v0.16))))) + ppx_optcomp + ppx_expect + ppx_inline_test + ppx_sexp_conv + ppx_sexp_value + ppx_variants_conv)) (package (name regular) @@ -1628,10 +1625,10 @@ In particular, the library includes: \ - module Cache that adds caching service for data types \ - module Regular that glues everything together \ - module Opaque for regular but opaque data types \ -- module Seq that extends Core_kernel's sequence module \ +- module Seq that extends Core's sequence module \ - module Bytes that provides a rich core-like interface for Bytes data type.") (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core (bap-common (= :version)) (ppx_bap (= :version)))) @@ -1640,7 +1637,8 @@ In particular, the library includes: \ (synopsis "A library for rich formatting using semantic tags") (tags (bap)) (depends - (core_kernel (and (>= v0.14) (< v0.16))) + core + core_kernel (bap-common (= :version)))) (generate_opam_files true) diff --git a/graphlib.opam b/graphlib.opam index db28362d1..5889466cc 100644 --- a/graphlib.opam +++ b/graphlib.opam @@ -13,7 +13,7 @@ bug-reports: "https://github.com/BinaryAnalysisPlatform/bap/issues" depends: [ "dune" {>= "3.1"} "bap-common" {= version} - "core_kernel" {>= "v0.14" & < "v0.16"} + "core" "ocamlgraph" "ppx_bap" {= version} "regular" {= version} diff --git a/lib/arm/ARM.ml b/lib/arm/ARM.ml index 9c5336410..1fec364ee 100644 --- a/lib/arm/ARM.ml +++ b/lib/arm/ARM.ml @@ -3,4 +3,4 @@ include Arm_lifter module Insn = Arm_insn module Cond = Arm_cond module Reg = Arm_reg -module Op = Arm_op +module Op = Arm_op diff --git a/lib/arm/arm_bit.ml b/lib/arm/arm_bit.ml index b825415d9..74b52f766 100644 --- a/lib/arm/arm_bit.ml +++ b/lib/arm/arm_bit.ml @@ -1,78 +1,76 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Or_error - open Arm_types open Arm_utils - module Env = Arm_env module Shift = Arm_shift -let bits_of_size = function - | `H -> 16 - | `B -> 8 - - +let bits_of_size = function `H -> 16 | `B -> 8 let wordm x = Ok (Word.of_int x ~width:32) let extend ~dest ~src ?src2 sign size ~rot cond = let rot = assert_imm [%here] rot in let dest = assert_reg [%here] dest in - let amount = match Word.Int_err.((!$rot * wordm 8)) with + let amount = + match Word.Int_err.(!$rot * wordm 8) with | Ok amount -> amount - | Error err -> fail [%here] "failed to obtain amount" in + | Error err -> fail [%here] "failed to obtain amount" + in let rotated, (_ : exp) = - if Word.is_zero amount then - exp_of_op src, Bil.int (Word.zero 32) - else - Shift.lift_c ~src:(exp_of_op src) - `ROR ~shift:(Bil.int amount) reg32_t in - let extracted = - Bil.(cast low (bits_of_size size) rotated) in + if Word.is_zero amount then (exp_of_op src, Bil.int (Word.zero 32)) + else Shift.lift_c ~src:(exp_of_op src) `ROR ~shift:(Bil.int amount) reg32_t + in + let extracted = Bil.(cast low (bits_of_size size) rotated) in let extent = cast_of_sign sign 32 extracted in - let final = match src2 with - | Some s2 -> Bil.(exp_of_op s2 + extent) - | None -> extent in - exec [assn (Env.of_reg dest) final] cond + let final = + match src2 with Some s2 -> Bil.(exp_of_op s2 + extent) | None -> extent + in + exec [ assn (Env.of_reg dest) final ] cond let bit_extract ~dest ~src sign ~lsb ~widthminus1 cond = let dest = assert_reg [%here] dest in let lsb = assert_imm [%here] lsb in let widthminus1 = assert_imm [%here] widthminus1 in - let int_of_imm imm = match Word.to_int imm with + let int_of_imm imm = + match Word.to_int imm with | Ok imm -> imm - | Error err -> fail [%here] "can't cast word to int: %s" @@ - Error.to_string_hum err in + | Error err -> + fail [%here] "can't cast word to int: %s" @@ Error.to_string_hum err + in let low = int_of_imm lsb in - let high = low + (int_of_imm widthminus1) in - let extracted = Bil.extract high low (exp_of_op src) in + let high = low + int_of_imm widthminus1 in + let extracted = Bil.extract ~hi:high ~lo:low (exp_of_op src) in let final = cast_of_sign sign 32 extracted in - exec [assn (Env.of_reg dest) final] cond + exec [ assn (Env.of_reg dest) final ] cond let get_lsb_width instr : int * int = let open Word.Int_exn in let width = Word.bitwidth instr in - let (!$) = Word.of_int ~width in + let ( !$ ) = Word.of_int ~width in let lsb = (instr lsr !$7) land !$0x1f in let msb = (instr lsr !$16) land !$0x1f in let width = abs (msb - lsb + !$1) in match Word.(to_int lsb, to_int width) with - | Ok lsb, Ok width -> lsb,width + | Ok lsb, Ok width -> (lsb, width) | _ -> fail [%here] "failed to get_lsb_width" let bit_field_insert ~dest ~src raw cond = let dest = assert_reg [%here] dest in - let d = Env.of_reg dest in + let d = Env.of_reg dest in let d_e = Bil.var d in let lsb, width = get_lsb_width raw in - let extracted = Bil.extract (width - 1) 0 (exp_of_op src) in - let ext_h b s = Bil.extract 31 b s in - let ext_l b s = Bil.extract b 0 s in - let inst = match lsb + width - 1, lsb with + let extracted = Bil.extract ~hi:(width - 1) ~lo:0 (exp_of_op src) in + let ext_h b s = Bil.extract ~hi:31 ~lo:b s in + let ext_l b s = Bil.extract ~hi:b ~lo:0 s in + let inst = + match (lsb + width - 1, lsb) with | 31, 0 -> extracted | 31, l -> Bil.concat extracted (ext_l (l - 1) d_e) - | m, 0 -> Bil.concat (ext_h (m + 1) d_e) extracted - | m, l -> Bil.concat (Bil.concat - (ext_h (m + 1) d_e) extracted) - (ext_l (l - 1) d_e) in - exec [Bil.move d inst] cond + | m, 0 -> Bil.concat (ext_h (m + 1) d_e) extracted + | m, l -> + Bil.concat + (Bil.concat (ext_h (m + 1) d_e) extracted) + (ext_l (l - 1) d_e) + in + exec [ Bil.move d inst ] cond diff --git a/lib/arm/arm_bit.mli b/lib/arm/arm_bit.mli index 915b67471..bccb75855 100644 --- a/lib/arm/arm_bit.mli +++ b/lib/arm/arm_bit.mli @@ -1,12 +1,18 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types - - -val extend : dest:op -> src:op -> ?src2:op -> sign -> [< `B | `H ] -> rot:op -> op -> stmt list +val extend : + dest:op -> + src:op -> + ?src2:op -> + sign -> + [< `B | `H ] -> + rot:op -> + op -> + stmt list val bit_field_insert : dest:op -> src:op -> Word.t -> op -> stmt list - -val bit_extract : dest:op -> src:op -> sign -> lsb:op -> widthminus1:op -> op -> stmt list +val bit_extract : + dest:op -> src:op -> sign -> lsb:op -> widthminus1:op -> op -> stmt list diff --git a/lib/arm/arm_branch.ml b/lib/arm/arm_branch.ml index a01f30ced..eeb8fa7b2 100644 --- a/lib/arm/arm_branch.ml +++ b/lib/arm/arm_branch.ml @@ -1,13 +1,11 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Or_error - open Arm_types open Arm_utils - module Env = Arm_env -let pc_offset = Word.(of_int 8 ~width:32) (* PC is ahead by some bytes in ARM *) +let pc_offset = Word.(of_int 8 ~width:32) (* PC is ahead by some bytes in ARM *) let word = Word.of_int ~width:32 let lift operand ?link ?x:_ ?cond addr = @@ -15,20 +13,22 @@ let lift operand ?link ?x:_ ?cond addr = match operand with | `Reg r -> Bil.var (Env.of_reg r) | `Imm offset -> - let width = Word.bitwidth offset in - let _1 = Word.one 32 in - let min_32 = Word.Int_exn.(_1 lsl Word.of_int 31 ~width) in - let offset = if Word.equal offset min_32 then Word.zero 32 else offset in - let r = Word.Int_exn.(addr + pc_offset + offset) in - Bil.int r in + let width = Word.bitwidth offset in + let _1 = Word.one 32 in + let min_32 = Word.Int_exn.(_1 lsl Word.of_int 31 ~width) in + let offset = + if Word.equal offset min_32 then Word.zero 32 else offset + in + let r = Word.Int_exn.(addr + pc_offset + offset) in + Bil.int r + in (* TODO detect change to thumb in `x` *) - let jump_instr = [Bil.jmp target] in + let jump_instr = [ Bil.jmp target ] in let link_instr = let next_addr = Word.Int_exn.(addr + pc_offset - word 4) in match link with - | Some true -> [Bil.move Env.lr Bil.(int next_addr)] - | _ -> [] in + | Some true -> [ Bil.move Env.lr Bil.(int next_addr) ] + | _ -> [] + in let stmts = link_instr @ jump_instr in - match cond with - | Some c -> exec stmts c - | None -> stmts + match cond with Some c -> exec stmts c | None -> stmts diff --git a/lib/arm/arm_branch.mli b/lib/arm/arm_branch.mli index 0eadef0c6..6744df3a7 100644 --- a/lib/arm/arm_branch.mli +++ b/lib/arm/arm_branch.mli @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types diff --git a/lib/arm/arm_cond.ml b/lib/arm/arm_cond.ml index de49ff004..cf2bb027a 100644 --- a/lib/arm/arm_cond.ml +++ b/lib/arm/arm_cond.ml @@ -1,22 +1,21 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std - open Arm_types type t = cond [@@deriving bin_io, compare, sexp] let of_int_exn = function - | 0 -> `EQ - | 1 -> `NE - | 2 -> `CS - | 3 -> `CC - | 4 -> `MI - | 5 -> `PL - | 6 -> `VS - | 7 -> `VC - | 8 -> `HI - | 9 -> `LS + | 0 -> `EQ + | 1 -> `NE + | 2 -> `CS + | 3 -> `CC + | 4 -> `MI + | 5 -> `PL + | 6 -> `VS + | 7 -> `VC + | 8 -> `HI + | 9 -> `LS | 10 -> `GE | 11 -> `LT | 12 -> `GT @@ -26,14 +25,13 @@ let of_int_exn = function let create w = let open Or_error in - Word.to_int w >>= fun w -> - try_with ~backtrace:true (fun () -> of_int_exn w) + Word.to_int w >>= fun w -> try_with ~backtrace:true (fun () -> of_int_exn w) + +include Regular.Make (struct + type t = cond [@@deriving bin_io, compare, sexp] -include Regular.Make(struct - type t = cond [@@deriving bin_io, compare, sexp] - let hash (cond : t) = Hashtbl.hash cond - let module_name = Some "Arm.Cond" - let version = "1.0.0" - let pp fmt cond = - Format.fprintf fmt "%a" Sexp.pp (sexp_of_t cond) - end) + let hash (cond : t) = Hashtbl.hash cond + let module_name = Some "Arm.Cond" + let version = "1.0.0" + let pp fmt cond = Format.fprintf fmt "%a" Sexp.pp (sexp_of_t cond) +end) diff --git a/lib/arm/arm_cond.mli b/lib/arm/arm_cond.mli index 97b6bfc0d..4d1872226 100644 --- a/lib/arm/arm_cond.mli +++ b/lib/arm/arm_cond.mli @@ -1,11 +1,11 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std open Arm_types type t = cond [@@deriving bin_io, compare, sexp] -(** decodes condition value from a word *) val create : word -> cond Or_error.t +(** decodes condition value from a word *) include Regular.S with type t := t diff --git a/lib/arm/arm_env.ml b/lib/arm/arm_env.ml index 93b6de7ff..020436ca2 100644 --- a/lib/arm/arm_env.ml +++ b/lib/arm/arm_env.ml @@ -1,13 +1,10 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std open Arm_types -let (%:) name typ = Var.create name typ - - +let ( %: ) name typ = Var.create name typ let nil = Arm_reg.to_string `Nil %: reg32_t - let make_register reg ty = Arm_reg.to_string reg %: ty let reg32 reg = make_register reg reg32_t @@ -23,7 +20,6 @@ let vf = "VF" %: bool_t let qf = "QF" %: bool_t let ge = Array.init 4 ~f:(fun n -> sprintf "GE%d" n %: bool_t) - (* Thumb if-then state register *) let itstate = Arm_reg.to_string `ITSTATE %: reg8_t @@ -33,16 +29,16 @@ let pc = reg32 `PC let sp = reg32 `SP (* 32-bit general-purpose registers *) -let r0 = reg32 `R0 -let r1 = reg32 `R1 -let r2 = reg32 `R2 -let r3 = reg32 `R3 -let r4 = reg32 `R4 -let r5 = reg32 `R5 -let r6 = reg32 `R6 -let r7 = reg32 `R7 -let r8 = reg32 `R8 -let r9 = reg32 `R9 +let r0 = reg32 `R0 +let r1 = reg32 `R1 +let r2 = reg32 `R2 +let r3 = reg32 `R3 +let r4 = reg32 `R4 +let r5 = reg32 `R5 +let r6 = reg32 `R6 +let r7 = reg32 `R7 +let r8 = reg32 `R8 +let r9 = reg32 `R9 let r10 = reg32 `R10 let r11 = reg32 `R11 let r12 = reg32 `R12 @@ -53,22 +49,22 @@ let r14 = lr let r15 = pc let var_of_gpr : gpr_reg -> var = function - | `R0 -> r0 - | `R1 -> r1 - | `R2 -> r2 - | `R3 -> r3 - | `R4 -> r4 - | `R5 -> r5 - | `R6 -> r6 - | `R7 -> r7 - | `R8 -> r8 - | `R9 -> r9 + | `R0 -> r0 + | `R1 -> r1 + | `R2 -> r2 + | `R3 -> r3 + | `R4 -> r4 + | `R5 -> r5 + | `R6 -> r6 + | `R7 -> r7 + | `R8 -> r8 + | `R9 -> r9 | `R10 -> r10 | `R11 -> r11 | `R12 -> r12 - | `LR -> lr - | `PC -> pc - | `SP -> sp + | `LR -> lr + | `PC -> pc + | `SP -> sp let var_of_ccr : ccr_reg -> var = function | `CPSR -> cpsr diff --git a/lib/arm/arm_env.mli b/lib/arm/arm_env.mli index 46d52a84e..d2038ea15 100644 --- a/lib/arm/arm_env.mli +++ b/lib/arm/arm_env.mli @@ -1,101 +1,87 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types - -(** [spsr] Saved Processor Status Register *) val spsr : var +(** [spsr] Saved Processor Status Register *) - -(** [cpsr] Current Processor Status Register *) val cpsr : var +(** [cpsr] Current Processor Status Register *) - -(** [nf] Negative Flag *) val nf : var +(** [nf] Negative Flag *) -(** [zf] Zero Flag *) val zf : var +(** [zf] Zero Flag *) -(** [cf] Carry Flag *) val cf : var +(** [cf] Carry Flag *) -(** [vf] oVerfrlow Flag *) val vf : var +(** [vf] oVerfrlow Flag *) -(** [qf] underflow (saturation) Flag *) val qf : var +(** [qf] underflow (saturation) Flag *) - -(** [ge] array of general registers *) val ge : var array +(** [ge] array of general registers *) - -(** [itstate] ITSTATE register *) val itstate : var +(** [itstate] ITSTATE register *) - -(** [lr] Link Register *) val lr : var +(** [lr] Link Register *) - -(** [pc] Program Counter *) val pc : var +(** [pc] Program Counter *) -(** [sp] Stack Pointer *) val sp : var +(** [sp] Stack Pointer *) - -(** general purpose register *) val r0 : var +(** general purpose register *) -(** general purpose register *) val r1 : var +(** general purpose register *) -(** general purpose register *) val r2 : var +(** general purpose register *) -(** general purpose register *) val r3 : var +(** general purpose register *) -(** general purpose register *) val r4 : var +(** general purpose register *) -(** general purpose register *) val r5 : var +(** general purpose register *) -(** general purpose register *) val r6 : var +(** general purpose register *) - -(** general purpose register *) val r7 : var +(** general purpose register *) - -(** general purpose register *) val r8 : var +(** general purpose register *) - -(** general purpose register *) val r9 : var +(** general purpose register *) -(** general purpose register *) val r10 : var +(** general purpose register *) -(** general purpose register *) val r11 : var +(** general purpose register *) -(** general purpose register *) val r12 : var +(** general purpose register *) - -(** [of_reg arm_reg] lifts arm register into BIL variable *) val of_reg : reg -> var +(** [of_reg arm_reg] lifts arm register into BIL variable *) - -(** [new_var name] creates a freshly new variable prefixed with [name] *) val new_var : string -> var +(** [new_var name] creates a freshly new variable prefixed with [name] *) - -(** [mem] BIL variable that denotes the system memory. *) val mem : var +(** [mem] BIL variable that denotes the system memory. *) diff --git a/lib/arm/arm_flags.ml b/lib/arm/arm_flags.ml index 4563d9e59..73bee731f 100644 --- a/lib/arm/arm_flags.ml +++ b/lib/arm/arm_flags.ml @@ -1,34 +1,26 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std - open Arm_types open Arm_utils +module Env = Arm_env -module Env = Arm_env - -let set_nzf r t = [ - Bil.move Env.nf (msb r); - Bil.move Env.zf Bil.(r = zero t); -] +let set_nzf r t = [ Bil.move Env.nf (msb r); Bil.move Env.zf Bil.(r = zero t) ] let set_vnzf_add s1 s2 r t = - Bil.move Env.vf (msb Bil.((lnot (s1 lxor s2)) land (s1 lxor r))) - :: set_nzf r t + Bil.move Env.vf (msb Bil.(lnot (s1 lxor s2) land (s1 lxor r))) :: set_nzf r t -let set_add s1 s2 r t = - Bil.move Env.cf Bil.(r < s1) :: set_vnzf_add s1 s2 r t +let set_add s1 s2 r t = Bil.move Env.cf Bil.(r < s1) :: set_vnzf_add s1 s2 r t let set_vnzf_sub s1 s2 r t = - Bil.move Env.vf (msb Bil.((s1 lxor s2) land (s1 lxor r))) :: - set_nzf r t + Bil.move Env.vf (msb Bil.(s1 lxor s2 land (s1 lxor r))) :: set_nzf r t -let set_sub s1 s2 r t = - Bil.move Env.cf Bil.(s2 <= s1) :: set_vnzf_sub s1 s2 r t +let set_sub s1 s2 r t = Bil.move Env.cf Bil.(s2 <= s1) :: set_vnzf_sub s1 s2 r t let set_adc s1 s2 r t = let sum_with_carry = let extend = Bil.(cast unsigned) (bitlen t + 1) in - Bil.(extend s1 + extend s2 + extend (var Env.cf)) in + Bil.(extend s1 + extend s2 + extend (var Env.cf)) + in Bil.move Env.cf (msb sum_with_carry) :: set_vnzf_add s1 s2 r t let set_sbc s1 s2 r t = set_adc s1 Bil.(lnot s2) r t @@ -38,8 +30,9 @@ let set_cf_data ~imm ~data = let width = Word.bitwidth imm in if Word.(of_int ~width 255 >= imm && imm >= zero width) then let width = Word.bitwidth data in - if Word.(Int_exn.(data land of_int ~width 0xf00) = zero width) - then Bil.unknown "undefined" bool_t + if Word.(Int_exn.(data land of_int ~width 0xf00) = zero width) then + Bil.unknown "undefined" bool_t else Bil.int Word.b0 - else msb Bil.(int imm) in + else msb Bil.(int imm) + in Bil.move Env.cf value diff --git a/lib/arm/arm_flags.mli b/lib/arm/arm_flags.mli index af0cac0e0..e0419f687 100644 --- a/lib/arm/arm_flags.mli +++ b/lib/arm/arm_flags.mli @@ -1,19 +1,12 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types val set_nzf : exp -> typ -> stmt list - val set_vnzf_add : exp -> exp -> exp -> typ -> stmt list - val set_add : exp -> exp -> exp -> typ -> stmt list - val set_sub : exp -> exp -> exp -> typ -> stmt list - val set_vnzf_sub : exp -> exp -> exp -> typ -> stmt list - val set_adc : exp -> exp -> exp -> typ -> stmt list - val set_sbc : exp -> exp -> exp -> typ -> stmt list - val set_cf_data : imm:word -> data:word -> stmt diff --git a/lib/arm/arm_helpers.ml b/lib/arm/arm_helpers.ml index d27b7954d..16ea62543 100644 --- a/lib/arm/arm_helpers.ml +++ b/lib/arm/arm_helpers.ml @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core let sexpable_of_string t_of_sexp name = try Some (t_of_sexp @@ Sexp.of_string name) diff --git a/lib/arm/arm_helpers.mli b/lib/arm/arm_helpers.mli index 94f78fb9b..28f001391 100644 --- a/lib/arm/arm_helpers.mli +++ b/lib/arm/arm_helpers.mli @@ -1,3 +1,3 @@ -open Core_kernel[@@warning "-D"] +open Core val sexpable_of_string : (Sexp.t -> 'a) -> string -> 'a option diff --git a/lib/arm/arm_insn.ml b/lib/arm/arm_insn.ml index 08a19a7ad..77dcabf33 100644 --- a/lib/arm/arm_insn.ml +++ b/lib/arm/arm_insn.ml @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std open Arm_helpers @@ -18,11 +18,11 @@ let of_name name = let of_basic insn = of_name (Disasm_expert.Basic.Insn.name insn) let create insn = of_name (Insn.name insn) -include Regular.Make(struct - type nonrec t = t [@@deriving bin_io, compare, sexp] - let module_name = Some "Arm.Insn" - let version = "1.0.0" - let pp fmt insn = - Format.fprintf fmt "%a" Sexp.pp (sexp_of_t insn) - let hash (insn : t) = Hashtbl.hash insn - end) +include Regular.Make (struct + type nonrec t = t [@@deriving bin_io, compare, sexp] + + let module_name = Some "Arm.Insn" + let version = "1.0.0" + let pp fmt insn = Format.fprintf fmt "%a" Sexp.pp (sexp_of_t insn) + let hash (insn : t) = Hashtbl.hash insn +end) diff --git a/lib/arm/arm_insn.mli b/lib/arm/arm_insn.mli index 6af0bd61a..c0d31f4c7 100644 --- a/lib/arm/arm_insn.mli +++ b/lib/arm/arm_insn.mli @@ -1,20 +1,16 @@ open Bap.Std open Regular.Std - -(** insn opcode. - - In contradicition with BAP insn, the ARM one is just an opcode, - without operands. (Possibly, opcode would be a much better - name). *) type t = Arm_types.insn [@@deriving bin_io, compare, sexp] +(** insn opcode. + In contradicition with BAP insn, the ARM one is just an opcode, without + operands. (Possibly, opcode would be a much better name). *) -(** [create insn] translate from BAP [insn] *) val create : insn -> t option +(** [create insn] translate from BAP [insn] *) - -(** [of_basic insn] translate from BAP basic [insn] *) -val of_basic : ('a,'b) Disasm_expert.Basic.insn -> t option +val of_basic : ('a, 'b) Disasm_expert.Basic.insn -> t option +(** [of_basic insn] translate from BAP basic [insn] *) include Regular.S with type t := t diff --git a/lib/arm/arm_lifter.ml b/lib/arm/arm_lifter.ml index 38f242c85..8e24f6b4d 100644 --- a/lib/arm/arm_lifter.ml +++ b/lib/arm/arm_lifter.ml @@ -1,1053 +1,859 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap_core_theory open Or_error open Bap.Std - open Arm_types open Arm_utils - -include Self() - -module Basic = Disasm_expert.Basic -module Bit = Arm_bit -module Branch = Arm_branch -module Env = Arm_env -module Mem = Arm_mem +include Self () +module Basic = Disasm_expert.Basic +module Bit = Arm_bit +module Branch = Arm_branch +module Env = Arm_env +module Mem = Arm_mem module Mem_shift = Arm_mem_shift -module Mov = Arm_mov -module Mul = Arm_mul -module Shift = Arm_shift -module Flags = Arm_flags - +module Mov = Arm_mov +module Mul = Arm_mul +module Shift = Arm_shift +module Flags = Arm_flags let word = Word.of_int ~width:32 let int32 x = Bil.int (word x) - let string_of_ops ops = Format.asprintf "%a" Sexp.pp (sexp_of_array sexp_of_op ops) let lift_move ~encoding mem ops (insn : move_insn) : stmt list = let open Mov in - match insn, ops with - | `MOVi, [|dest; src; cond; _; wflag|] - | `MOVr, [|dest; src; cond; _; wflag|] -> - lift ~encoding ~dest src `MOV mem cond ~wflag - | `MOVsr, [|dest; src; sreg; simm; cond; _; wflag|] -> - lift ~encoding ~dest src `MOV mem cond ~wflag ~sreg ~simm - | `MOVsi, [|dest; src; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src `MOV ~simm:shift_imm mem cond ~wflag - - | `MOVPCLR, [|cond; wflag|] -> - lift ~encoding ~dest:(`Reg `PC) (`Reg `LR) `MOV mem cond ~wflag - - | `MVNi, [|dest; src; cond; _; wflag|] - | `MVNr, [|dest; src; cond; _; wflag|] -> - lift ~encoding ~dest src `MVN mem cond ~wflag - - | `MVNsr, [|dest; src; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src `MVN ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `MVNsi, [|dest; src; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src `MVN ~simm:shift_imm mem cond ~wflag - - | `ANDri, [|dest; src1; src2; cond; _; wflag|] - | `ANDrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `AND mem cond ~wflag - - | `ANDrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `AND ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `ANDrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `AND ~simm:shift_imm - mem cond ~wflag - - | `BICri, [|dest; src1; src2; cond; _; wflag|] - | `BICrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `BIC mem cond ~wflag - - | `BICrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `BIC ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `BICrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `BIC ~simm:shift_imm - mem cond ~wflag - - | `EORri, [|dest; src1; src2; cond; _; wflag|] - | `EORrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `EOR mem cond ~wflag - - | `EORrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `EOR ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `EORrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `EOR ~simm:shift_imm - mem cond ~wflag - - | `ORRri, [|dest; src1; src2; cond; _; wflag|] - | `ORRrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ORR mem cond ~wflag - - | `ORRrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ORR ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `ORRrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ORR ~simm:shift_imm - mem cond ~wflag - - | `TEQri, [|src1; src2; cond; _|] - | `TEQrr, [|src1; src2; cond; _|] -> - lift ~encoding src1 ~src2 `EOR mem cond ~wflag:(`Reg `CPSR) - - | `TEQrsr, [|src1; src2; shift_reg; shift_imm; cond; _|] -> - lift ~encoding src1 ~src2 `EOR ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag:(`Reg `CPSR) - - | `TEQrsi, [|_dest; src1; src2; shift_imm; cond; _|] -> - lift ~encoding src1 ~src2 `EOR ~simm:shift_imm - mem cond ~wflag:(`Reg `CPSR) - - | `TSTri, [|src1; src2; cond; _|] - | `TSTrr, [|src1; src2; cond; _|] -> - lift ~encoding src1 ~src2 `AND mem cond ~wflag:(`Reg `CPSR) - - | `TSTrsr, [|src1; src2; shift_reg; shift_imm; cond; _|] -> - lift ~encoding src1 ~src2 `AND ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag:(`Reg `CPSR) - - | `TSTrsi, [|src1; src2; shift_imm; cond; _|] -> - lift ~encoding src1 ~src2 `AND ~simm:shift_imm - mem cond ~wflag:(`Reg `CPSR) - - | `ADDri, [|dest; src1; src2; cond; _; wflag|] - | `ADDrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ADD mem cond ~wflag - - | `ADDrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ADD ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `ADDrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ADD ~simm:shift_imm - mem cond ~wflag - - | `SUBri, [|dest; src1; src2; cond; _; wflag|] - | `SUBrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `SUB mem cond ~wflag - - | `SUBrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `SUB ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `SUBrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `SUB ~simm:shift_imm - mem cond ~wflag - - | `ADCri, [|dest; src1; src2; cond; _; wflag|] - | `ADCrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ADC mem cond ~wflag - - | `ADCrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ADC ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `ADCrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `ADC ~simm:shift_imm - mem cond ~wflag - - | `SBCri, [|dest; src1; src2; cond; _; wflag|] - | `SBCrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `SBC mem cond ~wflag - - | `SBCrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `SBC ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `SBCrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `SBC ~simm:shift_imm - mem cond ~wflag - - | `RSBri, [|dest; src1; src2; cond; _; wflag|] - | `RSBrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `RSB mem cond ~wflag - - | `RSBrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `RSB ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `RSBrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `RSB ~simm:shift_imm - mem cond ~wflag - - | `RSCri, [|dest; src1; src2; cond; _; wflag|] - | `RSCrr, [|dest; src1; src2; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `RSC mem cond ~wflag - - | `RSCrsr, [|dest; src1; src2; shift_reg; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `RSC ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag - - | `RSCrsi, [|dest; src1; src2; shift_imm; cond; _; wflag|] -> - lift ~encoding ~dest src1 ~src2 `RSC ~simm:shift_imm - mem cond ~wflag - - | `CMPri, [|src1; src2; cond; _|] - | `CMPrr, [|src1; src2; cond; _|] -> - lift ~encoding src1 ~src2 `SUB mem cond ~wflag:(`Reg `CPSR) - - | `CMPrsr, [|src1; src2; shift_reg; shift_imm; cond; _|] -> - lift ~encoding src1 ~src2 `SUB ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag:(`Reg `CPSR) - - | `CMPrsi, [|src1; src2; shift_imm; cond; _|] -> - lift ~encoding src1 ~src2 `SUB ~simm:shift_imm - mem cond ~wflag:(`Reg `CPSR) - - | `CMNri, [|src1; src2; cond; _|] - | `CMNzrr, [|src1; src2; cond; _|] -> - lift ~encoding src1 ~src2 `ADD mem cond ~wflag:(`Reg `CPSR) - - | `CMNzrsr, [|src1; src2; shift_reg; shift_imm; cond; _|] -> - lift ~encoding src1 ~src2 `ADD ~sreg:shift_reg ~simm:shift_imm - mem cond ~wflag:(`Reg `CPSR) - - | `CMNzrsi, [|src1; src2; shift_imm; cond; _|] -> - lift ~encoding src1 ~src2 `ADD ~simm:shift_imm - mem cond ~wflag:(`Reg `CPSR) - - (** Special Data Instructions *) - - | `MOVi16, [|`Reg dest; src; cond; _wflag|] -> - exec [Bil.move (Env.of_reg dest) (exp_of_op src)] cond - - | `MOVTi16, [|`Reg dest; _; src; cond; _wflag|] -> - let dest = Env.of_reg dest and src = exp_of_op src in - Bil.[dest := var dest land int32 0xFFFF lor src lsl int32 16] |> - fun ins -> exec ins cond - | insn,ops -> - fail [%here] "ops %s doesn't match move insn %s" - (string_of_ops ops) (Arm_insn.to_string (insn :> insn)) - - -let lift_bits mem ops (insn : bits_insn ) = + match (insn, ops) with + | `MOVi, [| dest; src; cond; _; wflag |] + | `MOVr, [| dest; src; cond; _; wflag |] -> + lift ~encoding ~dest src `MOV mem cond ~wflag + | `MOVsr, [| dest; src; sreg; simm; cond; _; wflag |] -> + lift ~encoding ~dest src `MOV mem cond ~wflag ~sreg ~simm + | `MOVsi, [| dest; src; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src `MOV ~simm:shift_imm mem cond ~wflag + | `MOVPCLR, [| cond; wflag |] -> + lift ~encoding ~dest:(`Reg `PC) (`Reg `LR) `MOV mem cond ~wflag + | `MVNi, [| dest; src; cond; _; wflag |] + | `MVNr, [| dest; src; cond; _; wflag |] -> + lift ~encoding ~dest src `MVN mem cond ~wflag + | `MVNsr, [| dest; src; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src `MVN ~sreg:shift_reg ~simm:shift_imm mem cond + ~wflag + | `MVNsi, [| dest; src; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src `MVN ~simm:shift_imm mem cond ~wflag + | `ANDri, [| dest; src1; src2; cond; _; wflag |] + | `ANDrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `AND mem cond ~wflag + | `ANDrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `AND ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `ANDrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `AND ~simm:shift_imm mem cond ~wflag + | `BICri, [| dest; src1; src2; cond; _; wflag |] + | `BICrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `BIC mem cond ~wflag + | `BICrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `BIC ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `BICrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `BIC ~simm:shift_imm mem cond ~wflag + | `EORri, [| dest; src1; src2; cond; _; wflag |] + | `EORrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `EOR mem cond ~wflag + | `EORrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `EOR ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `EORrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `EOR ~simm:shift_imm mem cond ~wflag + | `ORRri, [| dest; src1; src2; cond; _; wflag |] + | `ORRrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ORR mem cond ~wflag + | `ORRrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ORR ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `ORRrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ORR ~simm:shift_imm mem cond ~wflag + | `TEQri, [| src1; src2; cond; _ |] | `TEQrr, [| src1; src2; cond; _ |] -> + lift ~encoding src1 ~src2 `EOR mem cond ~wflag:(`Reg `CPSR) + | `TEQrsr, [| src1; src2; shift_reg; shift_imm; cond; _ |] -> + lift ~encoding src1 ~src2 `EOR ~sreg:shift_reg ~simm:shift_imm mem cond + ~wflag:(`Reg `CPSR) + | `TEQrsi, [| _dest; src1; src2; shift_imm; cond; _ |] -> + lift ~encoding src1 ~src2 `EOR ~simm:shift_imm mem cond + ~wflag:(`Reg `CPSR) + | `TSTri, [| src1; src2; cond; _ |] | `TSTrr, [| src1; src2; cond; _ |] -> + lift ~encoding src1 ~src2 `AND mem cond ~wflag:(`Reg `CPSR) + | `TSTrsr, [| src1; src2; shift_reg; shift_imm; cond; _ |] -> + lift ~encoding src1 ~src2 `AND ~sreg:shift_reg ~simm:shift_imm mem cond + ~wflag:(`Reg `CPSR) + | `TSTrsi, [| src1; src2; shift_imm; cond; _ |] -> + lift ~encoding src1 ~src2 `AND ~simm:shift_imm mem cond + ~wflag:(`Reg `CPSR) + | `ADDri, [| dest; src1; src2; cond; _; wflag |] + | `ADDrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ADD mem cond ~wflag + | `ADDrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ADD ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `ADDrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ADD ~simm:shift_imm mem cond ~wflag + | `SUBri, [| dest; src1; src2; cond; _; wflag |] + | `SUBrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `SUB mem cond ~wflag + | `SUBrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `SUB ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `SUBrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `SUB ~simm:shift_imm mem cond ~wflag + | `ADCri, [| dest; src1; src2; cond; _; wflag |] + | `ADCrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ADC mem cond ~wflag + | `ADCrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ADC ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `ADCrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `ADC ~simm:shift_imm mem cond ~wflag + | `SBCri, [| dest; src1; src2; cond; _; wflag |] + | `SBCrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `SBC mem cond ~wflag + | `SBCrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `SBC ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `SBCrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `SBC ~simm:shift_imm mem cond ~wflag + | `RSBri, [| dest; src1; src2; cond; _; wflag |] + | `RSBrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `RSB mem cond ~wflag + | `RSBrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `RSB ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `RSBrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `RSB ~simm:shift_imm mem cond ~wflag + | `RSCri, [| dest; src1; src2; cond; _; wflag |] + | `RSCrr, [| dest; src1; src2; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `RSC mem cond ~wflag + | `RSCrsr, [| dest; src1; src2; shift_reg; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `RSC ~sreg:shift_reg ~simm:shift_imm mem + cond ~wflag + | `RSCrsi, [| dest; src1; src2; shift_imm; cond; _; wflag |] -> + lift ~encoding ~dest src1 ~src2 `RSC ~simm:shift_imm mem cond ~wflag + | `CMPri, [| src1; src2; cond; _ |] | `CMPrr, [| src1; src2; cond; _ |] -> + lift ~encoding src1 ~src2 `SUB mem cond ~wflag:(`Reg `CPSR) + | `CMPrsr, [| src1; src2; shift_reg; shift_imm; cond; _ |] -> + lift ~encoding src1 ~src2 `SUB ~sreg:shift_reg ~simm:shift_imm mem cond + ~wflag:(`Reg `CPSR) + | `CMPrsi, [| src1; src2; shift_imm; cond; _ |] -> + lift ~encoding src1 ~src2 `SUB ~simm:shift_imm mem cond + ~wflag:(`Reg `CPSR) + | `CMNri, [| src1; src2; cond; _ |] | `CMNzrr, [| src1; src2; cond; _ |] -> + lift ~encoding src1 ~src2 `ADD mem cond ~wflag:(`Reg `CPSR) + | `CMNzrsr, [| src1; src2; shift_reg; shift_imm; cond; _ |] -> + lift ~encoding src1 ~src2 `ADD ~sreg:shift_reg ~simm:shift_imm mem cond + ~wflag:(`Reg `CPSR) + | `CMNzrsi, [| src1; src2; shift_imm; cond; _ |] -> + lift ~encoding src1 ~src2 `ADD ~simm:shift_imm mem cond + ~wflag:(`Reg `CPSR) + (* Special Data Instructions *) + | `MOVi16, [| `Reg dest; src; cond; _wflag |] -> + exec [ Bil.move (Env.of_reg dest) (exp_of_op src) ] cond + | `MOVTi16, [| `Reg dest; _; src; cond; _wflag |] -> + let dest = Env.of_reg dest and src = exp_of_op src in + Bil.[ dest := var dest land int32 0xFFFF lor (src lsl int32 16) ] + |> fun ins -> exec ins cond + | insn, ops -> + fail [%here] "ops %s doesn't match move insn %s" (string_of_ops ops) + (Arm_insn.to_string (insn :> insn)) + +let lift_bits mem ops (insn : bits_insn) = let open Bit in - match insn, ops with + match (insn, ops) with (* extends *) - | `UXTB, [|dest; src; rot; cond; _|] -> - extend ~dest ~src Unsigned `B ~rot cond - - | `UXTH, [|dest; src; rot; cond; _|] -> - extend ~dest ~src Unsigned `H ~rot cond - - | `SXTB, [|dest; src; rot; cond; _|] -> - extend ~dest ~src Signed `B ~rot cond - - | `SXTH, [|dest; src; rot; cond; _|] -> - extend ~dest ~src Signed `H ~rot cond - - | `UXTAB, [|dest; src; shift; rot; cond; _|] -> - extend ~dest ~src:shift ~src2:src Unsigned `B ~rot cond - - | `UXTAH, [|dest; src; shift; rot; cond; _|] -> - extend ~dest ~src:shift ~src2:src Unsigned `H ~rot cond - - | `SXTAB, [|dest; src; shift; rot; cond; _|] -> - extend ~dest ~src:shift ~src2:src Signed `B ~rot cond - - | `SXTAH, [|dest; src; shift; rot; cond; _|] -> - extend ~dest ~src:shift ~src2:src Signed `H ~rot cond - + | `UXTB, [| dest; src; rot; cond; _ |] -> + extend ~dest ~src Unsigned `B ~rot cond + | `UXTH, [| dest; src; rot; cond; _ |] -> + extend ~dest ~src Unsigned `H ~rot cond + | `SXTB, [| dest; src; rot; cond; _ |] -> + extend ~dest ~src Signed `B ~rot cond + | `SXTH, [| dest; src; rot; cond; _ |] -> + extend ~dest ~src Signed `H ~rot cond + | `UXTAB, [| dest; src; shift; rot; cond; _ |] -> + extend ~dest ~src:shift ~src2:src Unsigned `B ~rot cond + | `UXTAH, [| dest; src; shift; rot; cond; _ |] -> + extend ~dest ~src:shift ~src2:src Unsigned `H ~rot cond + | `SXTAB, [| dest; src; shift; rot; cond; _ |] -> + extend ~dest ~src:shift ~src2:src Signed `B ~rot cond + | `SXTAH, [| dest; src; shift; rot; cond; _ |] -> + extend ~dest ~src:shift ~src2:src Signed `H ~rot cond (* extracts *) - | `UBFX, [|dest; src; lsb; widthminus1; cond; _|] -> - bit_extract ~dest ~src Unsigned ~lsb ~widthminus1 cond - - | `SBFX, [|dest; src; lsb; widthminus1; cond; _|] -> - bit_extract ~dest ~src Signed ~lsb ~widthminus1 cond - - + | `UBFX, [| dest; src; lsb; widthminus1; cond; _ |] -> + bit_extract ~dest ~src Unsigned ~lsb ~widthminus1 cond + | `SBFX, [| dest; src; lsb; widthminus1; cond; _ |] -> + bit_extract ~dest ~src Signed ~lsb ~widthminus1 cond (* bit field *) - | `BFI, [|dest; _unknown; src; _bmask; cond; _|] -> - bit_field_insert ~dest ~src mem cond - - | `BFC, [|dest; _unknown; _bmask; cond; _|] -> - bit_field_insert ~dest ~src:(`Imm (word 0)) mem cond - + | `BFI, [| dest; _unknown; src; _bmask; cond; _ |] -> + bit_field_insert ~dest ~src mem cond + | `BFC, [| dest; _unknown; _bmask; cond; _ |] -> + bit_field_insert ~dest ~src:(`Imm (word 0)) mem cond (* bit reverse *) - | `RBIT, [|dest; src; cond; _|] -> - let dest = assert_reg [%here] dest in - let v = tmp ~name:"v" reg32_t in - let r = tmp ~name:"r" reg32_t in - let s = tmp ~name:"s" reg32_t in - exec Bil.([ - v := exp_of_op src lsr int32 1; - r := exp_of_op src; - s := int32 31; - while_ (var v <> int32 0) [ - r := var r lsl int32 1; - r := var r lor (var v land int32 1); - v := var v lsr int32 1; - s := var s - int32 1; - ]; - Env.of_reg dest := var r lsl var s; - ]) cond - + | `RBIT, [| dest; src; cond; _ |] -> + let dest = assert_reg [%here] dest in + let v = tmp ~name:"v" reg32_t in + let r = tmp ~name:"r" reg32_t in + let s = tmp ~name:"s" reg32_t in + exec + Bil. + [ + v := exp_of_op src lsr int32 1; + r := exp_of_op src; + s := int32 31; + while_ + (var v <> int32 0) + [ + r := var r lsl int32 1; + r := var r lor (var v land int32 1); + v := var v lsr int32 1; + s := var s - int32 1; + ]; + Env.of_reg dest := var r lsl var s; + ] + cond (* Swap bytes *) - | `SWPB, [|`Reg dest; `Reg src1; `Reg src2; cond; _|] -> - let temp = tmp reg8_t in - let dest = Env.of_reg dest in - let src1 = Env.of_reg src1 |> Bil.var in - let src2 = Env.of_reg src2 |> Bil.var in - exec Bil.([ - assn temp (load (var Env.mem) src2 LittleEndian `r8); - Env.mem := - store (var Env.mem) src2 (extract 7 0 src1) LittleEndian `r8; - assn dest (cast unsigned 32 (var temp)); - ]) cond - + | `SWPB, [| `Reg dest; `Reg src1; `Reg src2; cond; _ |] -> + let temp = tmp reg8_t in + let dest = Env.of_reg dest in + let src1 = Env.of_reg src1 |> Bil.var in + let src2 = Env.of_reg src2 |> Bil.var in + exec + Bil. + [ + assn temp (load ~mem:(var Env.mem) ~addr:src2 LittleEndian `r8); + Env.mem := + store ~mem:(var Env.mem) ~addr:src2 (extract ~hi:7 ~lo:0 src1) + LittleEndian `r8; + assn dest (cast unsigned 32 (var temp)); + ] + cond (* Pack half *) - | `PKHTB, [|`Reg dest; src1; src2; shift; cond; _|] -> - (* shift is always asr *) - let shifted, _ = - Shift.lift_c ~src:(exp_of_op src2) `ASR - ~shift:(exp_of_op shift) reg32_t in - exec [ - assn (Env.of_reg dest) - Bil.(extract 31 16 (exp_of_op src1) ^ - extract 15 0 shifted) - ] cond + | `PKHTB, [| `Reg dest; src1; src2; shift; cond; _ |] -> + (* shift is always asr *) + let shifted, _ = + Shift.lift_c ~src:(exp_of_op src2) `ASR ~shift:(exp_of_op shift) reg32_t + in + exec + [ + assn (Env.of_reg dest) + Bil.( + extract ~hi:31 ~lo:16 (exp_of_op src1) + ^ extract ~hi:15 ~lo:0 shifted); + ] + cond (* reverses *) - | `REV, [|`Reg dest; src; cond; _|] -> - let s = exp_of_op src in - let rev = Bil.(extract 7 0 s ^ - extract 15 8 s ^ - extract 23 16 s ^ - extract 31 24 s) in - exec [assn (Env.of_reg dest) rev] cond - | `REV16, [|`Reg dest; src; cond; _|] -> - let s = exp_of_op src in - let rev = Bil.(extract 23 16 s ^ - extract 31 24 s ^ - extract 7 0 s ^ - extract 15 8 s) in - exec [assn (Env.of_reg dest) rev] cond - | insn,ops -> - fail [%here] "ops %s doesn't match bits insn %s" - (string_of_ops ops) (Arm_insn.to_string (insn :> insn)) - - - + | `REV, [| `Reg dest; src; cond; _ |] -> + let s = exp_of_op src in + let rev = + Bil.( + extract ~hi:7 ~lo:0 s ^ extract ~hi:15 ~lo:8 s + ^ extract ~hi:23 ~lo:16 s ^ extract ~hi:31 ~lo:24 s) + in + exec [ assn (Env.of_reg dest) rev ] cond + | `REV16, [| `Reg dest; src; cond; _ |] -> + let s = exp_of_op src in + let rev = + Bil.( + extract ~hi:23 ~lo:16 s ^ extract ~hi:31 ~lo:24 s + ^ extract ~hi:7 ~lo:0 s ^ extract ~hi:15 ~lo:8 s) + in + exec [ assn (Env.of_reg dest) rev ] cond + | insn, ops -> + fail [%here] "ops %s doesn't match bits insn %s" (string_of_ops ops) + (Arm_insn.to_string (insn :> insn)) let lift_mult ops insn = let open Mul in - match insn,ops with - | `MUL, [|`Reg dest; src1; src2; cond; _; wflag|] - | `MUL, [|`Reg dest; src1; src2; cond; wflag|] -> - let flags = Flags.set_nzf Bil.(var (Env.of_reg dest)) reg32_t in - exec [ - assn (Env.of_reg dest) Bil.(exp_of_op src1 * exp_of_op src2) - ] ~flags ~wflag cond - - | `MLA, [|`Reg dest; src1; src2; addend; cond; _; wflag|] - | `MLA, [|`Reg dest; src1; src2; addend; cond; wflag|] -> - let flags = Flags.set_nzf Bil.(var Bil.(Env.of_reg dest)) reg32_t in - exec [ - assn (Env.of_reg dest) - Bil.(exp_of_op addend + exp_of_op src1 * exp_of_op src2) - ] ~flags ~wflag cond - - | `MLS, [|`Reg dest; src1; src2; addend; cond; _|] -> - exec [ - Bil.move (Env.of_reg dest) - Bil.(exp_of_op addend - exp_of_op src1 * exp_of_op src2) - ] cond - - | `UMULL, [|lodest; hidest; src1; src2; cond; _; wflag|] - | `UMULL, [|lodest; hidest; src1; src2; cond; wflag|] -> - lift_mull ~lodest ~hidest ~src1 ~src2 Unsigned ~wflag cond - - | `SMULL, [|lodest; hidest; src1; src2; cond; _; wflag|] - | `SMULL, [|lodest; hidest; src1; src2; cond; wflag|] -> - lift_mull ~lodest ~hidest ~src1 ~src2 Signed ~wflag cond - - | `UMLAL, [|lodest; hidest; src1; src2; - _loadd; _hiadd; cond; _; wflag|] - | `UMLAL, [|lodest; hidest; src1; src2; - _loadd; _hiadd; cond; wflag|] -> - lift_mull ~lodest ~hidest ~src1 ~src2 Unsigned ~addend:true ~wflag cond - - | `SMLAL, [|lodest; hidest; src1; src2; - _loadd; _hiadd; cond; _; wflag|] - | `SMLAL, [|lodest; hidest; src1; src2; - _loadd; _hiadd; cond; wflag|] -> - lift_mull ~lodest ~hidest ~src1 ~src2 Signed ~addend:true ~wflag cond - + match (insn, ops) with + | `MUL, [| `Reg dest; src1; src2; cond; _; wflag |] + | `MUL, [| `Reg dest; src1; src2; cond; wflag |] -> + let flags = Flags.set_nzf Bil.(var (Env.of_reg dest)) reg32_t in + exec + [ assn (Env.of_reg dest) Bil.(exp_of_op src1 * exp_of_op src2) ] + ~flags ~wflag cond + | `MLA, [| `Reg dest; src1; src2; addend; cond; _; wflag |] + | `MLA, [| `Reg dest; src1; src2; addend; cond; wflag |] -> + let flags = Flags.set_nzf Bil.(var Bil.(Env.of_reg dest)) reg32_t in + exec + [ + assn (Env.of_reg dest) + Bil.(exp_of_op addend + (exp_of_op src1 * exp_of_op src2)); + ] + ~flags ~wflag cond + | `MLS, [| `Reg dest; src1; src2; addend; cond; _ |] -> + exec + [ + Bil.move (Env.of_reg dest) + Bil.(exp_of_op addend - (exp_of_op src1 * exp_of_op src2)); + ] + cond + | `UMULL, [| lodest; hidest; src1; src2; cond; _; wflag |] + | `UMULL, [| lodest; hidest; src1; src2; cond; wflag |] -> + lift_mull ~lodest ~hidest ~src1 ~src2 Unsigned ~wflag cond + | `SMULL, [| lodest; hidest; src1; src2; cond; _; wflag |] + | `SMULL, [| lodest; hidest; src1; src2; cond; wflag |] -> + lift_mull ~lodest ~hidest ~src1 ~src2 Signed ~wflag cond + | `UMLAL, [| lodest; hidest; src1; src2; _loadd; _hiadd; cond; _; wflag |] + | `UMLAL, [| lodest; hidest; src1; src2; _loadd; _hiadd; cond; wflag |] -> + lift_mull ~lodest ~hidest ~src1 ~src2 Unsigned ~addend:true ~wflag cond + | `SMLAL, [| lodest; hidest; src1; src2; _loadd; _hiadd; cond; _; wflag |] + | `SMLAL, [| lodest; hidest; src1; src2; _loadd; _hiadd; cond; wflag |] -> + lift_mull ~lodest ~hidest ~src1 ~src2 Signed ~addend:true ~wflag cond (* signed 16bit mul plus a 32bit bit accum, Q *) - | `SMLABB, [|dest; src1; src2; accum; cond; _wflag|] -> - lift_smul ~dest ~src1 ~src2 ~accum ~q:true BB cond - + | `SMLABB, [| dest; src1; src2; accum; cond; _wflag |] -> + lift_smul ~dest ~src1 ~src2 ~accum ~q:true BB cond (* signed 16bit mul *) - | `SMULBB, [|dest; src1; src2; cond; _wflag|] -> - lift_smul ~dest ~src1 ~src2 BB cond - + | `SMULBB, [| dest; src1; src2; cond; _wflag |] -> + lift_smul ~dest ~src1 ~src2 BB cond (* two signed 16bit muls plus 32bit accum and optional xchg, Q*) - | `SMLAD, [|dest; src1; src2; accum; cond; _wflag|] -> - lift_smul ~dest ~src1 ~src2 ~accum ~q:true D cond - + | `SMLAD, [| dest; src1; src2; accum; cond; _wflag |] -> + lift_smul ~dest ~src1 ~src2 ~accum ~q:true D cond (* two signed 16bit muls and optional xchg, Q *) - | `SMUAD, [|dest; src1; src2; cond; _wflag|] -> - lift_smul ~dest ~src1 ~src2 ~q:true D cond - + | `SMUAD, [| dest; src1; src2; cond; _wflag |] -> + lift_smul ~dest ~src1 ~src2 ~q:true D cond (* signed 16bit times signed 32bit added to 32bit accum, Q *) - | `SMLAWB, [|dest; src1; src2; accum; cond; _wflag|] -> - lift_smul ~dest ~src1 ~src2 ~accum ~q:true WB cond - + | `SMLAWB, [| dest; src1; src2; accum; cond; _wflag |] -> + lift_smul ~dest ~src1 ~src2 ~accum ~q:true WB cond (* signed 16bit mul *) - | `SMULTB, [|dest; src1; src2; cond; _wflag|] -> - lift_smul ~dest ~src1 ~src2 TB cond - + | `SMULTB, [| dest; src1; src2; cond; _wflag |] -> + lift_smul ~dest ~src1 ~src2 TB cond (* signed 16bit mul plus 64bit accum *) - | `SMLALBT, [|dest; hidest; src1; src2; cond; _wflag|] -> - lift_smul ~dest ~hidest ~src1 ~src2 ~accum:dest ~hiaccum:hidest BT cond - - | insn,ops -> - fail [%here] "ops %s doesn't match mult insn %s" - (string_of_ops ops) (Arm_insn.to_string (insn :> insn)) - + | `SMLALBT, [| dest; hidest; src1; src2; cond; _wflag |] -> + lift_smul ~dest ~hidest ~src1 ~src2 ~accum:dest ~hiaccum:hidest BT cond + | insn, ops -> + fail [%here] "ops %s doesn't match mult insn %s" (string_of_ops ops) + (Arm_insn.to_string (insn :> insn)) let lift_mem_multi ops insn = - match insn, Array.to_list ops with + match (insn, Array.to_list ops) with | `STMDA, base :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m (List.rev dest_list) - base DA NoUpdate St in - exec insns cond - + let insns = Mem_shift.lift_m (List.rev dest_list) base DA NoUpdate St in + exec insns cond | `STMDA_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m (List.rev dest_list) - base DA Update St in - exec insns cond - + let insns = Mem_shift.lift_m (List.rev dest_list) base DA Update St in + exec insns cond | `LDMIB, base :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m dest_list base IB NoUpdate Ld in - exec insns cond - + let insns = Mem_shift.lift_m dest_list base IB NoUpdate Ld in + exec insns cond | `LDMIB_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m dest_list base IB Update Ld in - exec insns cond - + let insns = Mem_shift.lift_m dest_list base IB Update Ld in + exec insns cond | `STMIB, base :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m dest_list base IB NoUpdate St in - exec insns cond - + let insns = Mem_shift.lift_m dest_list base IB NoUpdate St in + exec insns cond | `STMIB_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m dest_list base IB Update St in - exec insns cond - + let insns = Mem_shift.lift_m dest_list base IB Update St in + exec insns cond | `LDMDB, base :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m (List.rev dest_list) - base DB NoUpdate Ld in - exec insns cond - + let insns = Mem_shift.lift_m (List.rev dest_list) base DB NoUpdate Ld in + exec insns cond | `LDMDB_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m (List.rev dest_list) - base DB Update Ld in - exec insns cond - + let insns = Mem_shift.lift_m (List.rev dest_list) base DB Update Ld in + exec insns cond | `STMDB, base :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m (List.rev dest_list) - base DB NoUpdate St in - exec insns cond - + let insns = Mem_shift.lift_m (List.rev dest_list) base DB NoUpdate St in + exec insns cond | `STMDB_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m (List.rev dest_list) - base DB Update St in - exec insns cond - - | `LDMIA, base :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m dest_list base IA NoUpdate Ld in - exec insns cond - - | `LDMIA_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m dest_list base IA Update Ld in - exec insns cond - + let insns = Mem_shift.lift_m (List.rev dest_list) base DB Update St in + exec insns cond + | `LDMIA, base :: cond :: _wr_flag :: dest_list -> + let insns = Mem_shift.lift_m dest_list base IA NoUpdate Ld in + exec insns cond + | `LDMIA_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> + let insns = Mem_shift.lift_m dest_list base IA Update Ld in + exec insns cond | `STMIA, base :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m dest_list base IA NoUpdate St in - exec insns cond - + let insns = Mem_shift.lift_m dest_list base IA NoUpdate St in + exec insns cond | `STMIA_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m dest_list base IA Update St in - exec insns cond - + let insns = Mem_shift.lift_m dest_list base IA Update St in + exec insns cond | `LDMDA, base :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m (List.rev dest_list) - base DA NoUpdate Ld in - exec insns cond - + let insns = Mem_shift.lift_m (List.rev dest_list) base DA NoUpdate Ld in + exec insns cond | `LDMDA_UPD, base :: _unknown :: cond :: _wr_flag :: dest_list -> - let insns = Mem_shift.lift_m (List.rev dest_list) - base DA Update Ld in - exec insns cond - + let insns = Mem_shift.lift_m (List.rev dest_list) base DA Update Ld in + exec insns cond | _ -> - fail [%here] "ops %s doesn't match multi arg insn %s" - (string_of_ops ops) (Arm_insn.to_string (insn :> insn)) - + fail [%here] "ops %s doesn't match multi arg insn %s" (string_of_ops ops) + (Arm_insn.to_string (insn :> insn)) let lift_mem ops insn = let open Mem in - - match insn, ops with - | `STRD, [|dest1; dest2; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset - Offset Unsigned D St in - exec insns cond - - | `LDRD, [|dest1; dest2; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset - Offset Unsigned D Ld in - exec insns cond - - | `STRD_POST, [|dest1; dest2; base; _unknown; reg_off; `Imm imm_off; - cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset - PostIndex Unsigned D St in - exec insns cond - - | `LDRD_POST, [|dest1; dest2; base; _unknown; reg_off; `Imm imm_off; - cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset - PostIndex Unsigned D Ld - in - exec insns cond - - | `STRD_PRE, [|_unknown; dest1; dest2; base; reg_off; `Imm imm_off; - cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset - PreIndex Unsigned D St - in - exec insns cond - - | `LDRD_PRE, [|dest1; dest2; _unknown; base; reg_off; `Imm imm_off; - cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset - PreIndex Unsigned D Ld - in - exec insns cond - - | `STRH, [|dest1; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset - Offset Unsigned H St - in - exec insns cond - - | `LDRH, [|dest1; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset - Offset Unsigned H Ld - in - exec insns cond - - | `STRH_PRE, [|_unknown; dest1; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset - PreIndex Unsigned H St - in - exec insns cond - - | `LDRH_PRE, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset - PreIndex Unsigned H Ld - in - exec insns cond - - | `STRH_POST, [|_unknown; dest1; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset - PostIndex Unsigned H St - in - exec insns cond - + match (insn, ops) with + | `STRD, [| dest1; dest2; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset Offset Unsigned D St + in + exec insns cond + | `LDRD, [| dest1; dest2; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset Offset Unsigned D Ld + in + exec insns cond + | ( `STRD_POST, + [| dest1; dest2; base; _unknown; reg_off; `Imm imm_off; cond; _ |] ) -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset PostIndex Unsigned D St + in + exec insns cond + | ( `LDRD_POST, + [| dest1; dest2; base; _unknown; reg_off; `Imm imm_off; cond; _ |] ) -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset PostIndex Unsigned D Ld + in + exec insns cond + | ( `STRD_PRE, + [| _unknown; dest1; dest2; base; reg_off; `Imm imm_off; cond; _ |] ) -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset PreIndex Unsigned D St + in + exec insns cond + | ( `LDRD_PRE, + [| dest1; dest2; _unknown; base; reg_off; `Imm imm_off; cond; _ |] ) -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~dest2 ~base ~offset PreIndex Unsigned D Ld + in + exec insns cond + | `STRH, [| dest1; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset Offset Unsigned H St + in + exec insns cond + | `LDRH, [| dest1; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset Offset Unsigned H Ld + in + exec insns cond + | `STRH_PRE, [| _unknown; dest1; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PreIndex Unsigned H St + in + exec insns cond + | `LDRH_PRE, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PreIndex Unsigned H Ld + in + exec insns cond + | `STRH_POST, [| _unknown; dest1; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned H St + in + exec insns cond (* Unlike the convention of all other load and store instructions, for some * instructions the sign bit is set in the immediate when the operand * is POSITIVE. Insructions that are affected by this are marked with * "POS_SIGN_BIT" **) (* POS_SIGN_BIT *) - | `STRHTr, [|_unknown; dest1; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_pos reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset - PostIndex Unsigned H St - in - exec insns cond - - | `LDRH_POST, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset - PostIndex Unsigned H Ld - in - exec insns cond - + | `STRHTr, [| _unknown; dest1; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_pos reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned H St + in + exec insns cond + | `LDRH_POST, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned H Ld + in + exec insns cond (* POS_SIGN_BIT *) - | `LDRHTr, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_pos reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned H Ld - in - exec insns cond - - | `LDRSH, [|dest1; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset Offset Signed H Ld - in - exec insns cond - - | `LDRSH_PRE, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PreIndex Signed H Ld - in - exec insns cond - - | `LDRSH_POST, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed H Ld - in - exec insns cond - + | `LDRHTr, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_pos reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned H Ld + in + exec insns cond + | `LDRSH, [| dest1; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset Offset Signed H Ld + in + exec insns cond + | `LDRSH_PRE, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PreIndex Signed H Ld + in + exec insns cond + | `LDRSH_POST, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed H Ld + in + exec insns cond (* POS_SIGN_BIT *) - | `LDRSHTr, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_pos reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed H Ld - in - exec insns cond - + | `LDRSHTr, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_pos reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed H Ld + in + exec insns cond (* POS_SIGN_BIT *) - | `LDRSHTi, [|dest1; _unknown; base; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_pos (`Reg `Nil) imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed H Ld - in - exec insns cond - - | `LDRSB, [|dest1; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset Offset Signed B Ld - in - exec insns cond - - | `LDRSB_PRE, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PreIndex Signed B Ld - in - exec insns cond - - | `LDRSB_POST, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed B Ld - in - exec insns cond - + | `LDRSHTi, [| dest1; _unknown; base; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_pos (`Reg `Nil) imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed H Ld + in + exec insns cond + | `LDRSB, [| dest1; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset Offset Signed B Ld + in + exec insns cond + | `LDRSB_PRE, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PreIndex Signed B Ld + in + exec insns cond + | `LDRSB_POST, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_neg reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed B Ld + in + exec insns cond (* POS_SIGN_BIT *) - | `LDRSBTr, [|dest1; _unknown; base; reg_off; `Imm imm_off; cond; _|] -> - let offset = Mem_shift.mem_offset_reg_or_imm_pos reg_off imm_off in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed B Ld - in - exec insns cond - - | `STRi12, [|dest1; base; offset; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset Offset Unsigned W St - in - exec insns cond - - | `LDRi12, [|dest1; base; offset; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset Offset Unsigned W Ld - in - exec insns cond - - | `STRBi12, [|dest1; base; offset; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset Offset Unsigned B St - in - exec insns cond - - | `LDRBi12, [|dest1; base; offset; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset Offset Unsigned B Ld - in - exec insns cond - - | `STRrs, [|dest1; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - Offset Unsigned W St - in - exec insns cond - - | `LDRrs, [|dest1; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - Offset Unsigned W Ld - in - exec insns cond - - | `STRBrs, [|dest1; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - Offset Unsigned B St - in - exec insns cond - - | `LDRBrs, [|dest1; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - Offset Unsigned B Ld - in - exec insns cond - - | `STR_POST_IMM, [|_unknown; dest1; base; _invalid; `Imm offset; cond; _|] -> - let offset = - Mem_shift.repair_imm offset ~sign_mask:0x1000 ~imm_mask:0xfff `NEG - in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned W St - in - exec insns cond - - | `LDR_POST_IMM, [|dest1; _unknown; base; _invalid; `Imm offset; cond; _|] -> - let offset = - Mem_shift.repair_imm offset ~sign_mask:0x1000 ~imm_mask:0xfff `NEG - in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned W Ld - in - exec insns cond - - | `STRB_POST_IMM, [|_unknown; dest1; base; _invalid; `Imm offset; cond; _|] - | `STRBT_POST_IMM, [|_unknown; dest1; base; _invalid; `Imm offset; cond; _|] + | `LDRSBTr, [| dest1; _unknown; base; reg_off; `Imm imm_off; cond; _ |] -> + let offset = Mem_shift.mem_offset_reg_or_imm_pos reg_off imm_off in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Signed B Ld + in + exec insns cond + | `STRi12, [| dest1; base; offset; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset Offset Unsigned W St + in + exec insns cond + | `LDRi12, [| dest1; base; offset; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset Offset Unsigned W Ld + in + exec insns cond + | `STRBi12, [| dest1; base; offset; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset Offset Unsigned B St + in + exec insns cond + | `LDRBi12, [| dest1; base; offset; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset Offset Unsigned B Ld + in + exec insns cond + | `STRrs, [| dest1; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift Offset Unsigned W St + in + exec insns cond + | `LDRrs, [| dest1; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift Offset Unsigned W Ld + in + exec insns cond + | `STRBrs, [| dest1; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift Offset Unsigned B St + in + exec insns cond + | `LDRBrs, [| dest1; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift Offset Unsigned B Ld + in + exec insns cond + | `STR_POST_IMM, [| _unknown; dest1; base; _invalid; `Imm offset; cond; _ |] -> - let offset = - Mem_shift.repair_imm offset ~sign_mask:0x1000 ~imm_mask:0xfff `NEG - in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned B St - in - exec insns cond - - | `LDRB_POST_IMM, [|dest1; _unknown; base; _invalid; `Imm offset; cond; _|] - | `LDRBT_POST_IMM, [|dest1; _unknown; base; _invalid; `Imm offset; cond; _|] + let offset = + Mem_shift.repair_imm offset ~sign_mask:0x1000 ~imm_mask:0xfff `NEG + in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned W St + in + exec insns cond + | `LDR_POST_IMM, [| dest1; _unknown; base; _invalid; `Imm offset; cond; _ |] -> - let offset = - Mem_shift.repair_imm offset ~sign_mask:0x1000 ~imm_mask:0xfff `NEG - in - let insns = - Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned B Ld - in - exec insns cond - - | `STR_POST_REG, [|_unknown; dest1; base; offset; shift; cond; _|] - | `STRT_POST_REG, [|_unknown; dest1; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - PostIndex Unsigned W St - in - exec insns cond - - | `LDR_POST_REG, [|dest1; _unknown; base; offset; shift; cond; _|] - | `LDRT_POST_REG, [|dest1; _unknown; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - PostIndex Unsigned W Ld - in - exec insns cond - - | `STRB_POST_REG, [|_unknown; dest1; base; offset; shift; cond; _|] - | `STRBT_POST_REG, [|_unknown; dest1; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - PostIndex Unsigned B St - in - exec insns cond - - | `LDRB_POST_REG, [|dest1; _unknown; base; offset; shift; cond; _|] - | `LDRBT_POST_REG, [|dest1; _unknown; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - PostIndex Unsigned B Ld - in - exec insns cond - - | `STR_PRE_IMM, [|_unknown; dest1; base; offset; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset PreIndex Unsigned W St - in - exec insns cond - - | `LDR_PRE_IMM, [|dest1; _unknown; base; offset; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset PreIndex Unsigned W Ld - in - exec insns cond - - | `STRB_PRE_IMM, [|_unknown; dest1; base; offset; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset PreIndex Unsigned B St - in - exec insns cond - - | `LDRB_PRE_IMM, [|dest1; _unknown; base; offset; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset PreIndex Unsigned B Ld - in - exec insns cond - - | `STR_PRE_REG, [|_unknown; dest1; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - PreIndex Unsigned W St - in - exec insns cond - - | `LDR_PRE_REG, [|dest1; _unknown; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - PreIndex Unsigned W Ld - in - exec insns cond - - | `STRB_PRE_REG, [|_unknown; dest1; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - PreIndex Unsigned B St - in - exec insns cond - - | `LDRB_PRE_REG, [|dest1; _unknown; base; offset; shift; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift - PreIndex Unsigned B Ld - in - exec insns cond - + let offset = + Mem_shift.repair_imm offset ~sign_mask:0x1000 ~imm_mask:0xfff `NEG + in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned W Ld + in + exec insns cond + | `STRB_POST_IMM, [| _unknown; dest1; base; _invalid; `Imm offset; cond; _ |] + | `STRBT_POST_IMM, [| _unknown; dest1; base; _invalid; `Imm offset; cond; _ |] + -> + let offset = + Mem_shift.repair_imm offset ~sign_mask:0x1000 ~imm_mask:0xfff `NEG + in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned B St + in + exec insns cond + | `LDRB_POST_IMM, [| dest1; _unknown; base; _invalid; `Imm offset; cond; _ |] + | `LDRBT_POST_IMM, [| dest1; _unknown; base; _invalid; `Imm offset; cond; _ |] + -> + let offset = + Mem_shift.repair_imm offset ~sign_mask:0x1000 ~imm_mask:0xfff `NEG + in + let insns = + Mem_shift.lift_r_exp ~dest1 ~base ~offset PostIndex Unsigned B Ld + in + exec insns cond + | `STR_POST_REG, [| _unknown; dest1; base; offset; shift; cond; _ |] + | `STRT_POST_REG, [| _unknown; dest1; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift PostIndex Unsigned W St + in + exec insns cond + | `LDR_POST_REG, [| dest1; _unknown; base; offset; shift; cond; _ |] + | `LDRT_POST_REG, [| dest1; _unknown; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift PostIndex Unsigned W Ld + in + exec insns cond + | `STRB_POST_REG, [| _unknown; dest1; base; offset; shift; cond; _ |] + | `STRBT_POST_REG, [| _unknown; dest1; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift PostIndex Unsigned B St + in + exec insns cond + | `LDRB_POST_REG, [| dest1; _unknown; base; offset; shift; cond; _ |] + | `LDRBT_POST_REG, [| dest1; _unknown; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift PostIndex Unsigned B Ld + in + exec insns cond + | `STR_PRE_IMM, [| _unknown; dest1; base; offset; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset PreIndex Unsigned W St + in + exec insns cond + | `LDR_PRE_IMM, [| dest1; _unknown; base; offset; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset PreIndex Unsigned W Ld + in + exec insns cond + | `STRB_PRE_IMM, [| _unknown; dest1; base; offset; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset PreIndex Unsigned B St + in + exec insns cond + | `LDRB_PRE_IMM, [| dest1; _unknown; base; offset; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset PreIndex Unsigned B Ld + in + exec insns cond + | `STR_PRE_REG, [| _unknown; dest1; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift PreIndex Unsigned W St + in + exec insns cond + | `LDR_PRE_REG, [| dest1; _unknown; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift PreIndex Unsigned W Ld + in + exec insns cond + | `STRB_PRE_REG, [| _unknown; dest1; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift PreIndex Unsigned B St + in + exec insns cond + | `LDRB_PRE_REG, [| dest1; _unknown; base; offset; shift; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base ~offset ~shift PreIndex Unsigned B Ld + in + exec insns cond (* Exclusive access, we may later want to do something special to these *) - - | `LDREX, [|dest1; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset:(`Imm (word 0)) - Offset Unsigned W Ld - in - exec insns cond - - | `LDREX, [|dest1; `Reg _ as dest2; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~dest2 ~base ~offset:(`Imm (word 0)) - Offset Unsigned W Ld - in - exec insns cond - - | `LDREXB, [|dest1; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset:(`Imm (word 0)) - Offset Unsigned B Ld - in - exec insns cond - - | `LDREXH, [|dest1; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1 ~base ~offset:(`Imm (word 0)) - Offset Unsigned H Ld - in - exec insns cond - + | `LDREX, [| dest1; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base + ~offset:(`Imm (word 0)) + Offset Unsigned W Ld + in + exec insns cond + | `LDREX, [| dest1; `Reg _ as dest2; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~dest2 ~base + ~offset:(`Imm (word 0)) + Offset Unsigned W Ld + in + exec insns cond + | `LDREXB, [| dest1; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base + ~offset:(`Imm (word 0)) + Offset Unsigned B Ld + in + exec insns cond + | `LDREXH, [| dest1; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1 ~base + ~offset:(`Imm (word 0)) + Offset Unsigned H Ld + in + exec insns cond (* multidest is one of the multireg combinations *) - | `LDREXD, [|multidest; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1:multidest ~base ~offset:(`Imm (word 0)) - Offset Unsigned D Ld - in - exec insns cond - - | `STREX, [|`Reg dest1; src1; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1:src1 ~base ~offset:(`Imm (word 0)) - Offset Unsigned W St in - let result = [Bil.move (Env.of_reg dest1) (int32 0)] in - exec (insns @ result) cond - - | `STREX, [|`Reg dest1; src1; `Reg _ as src2; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1:src1 ~dest2:src2 ~base ~offset:(`Imm (word 0)) - Offset Unsigned W St in - let result = [Bil.move (Env.of_reg dest1) (int32 0)] in - exec (insns @ result) cond - - | `STREXB, [|`Reg dest1; src1; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1:src1 ~base ~offset:(`Imm (word 0)) - Offset Unsigned B St - in - let result = [Bil.move (Env.of_reg dest1) (int32 0)] in - exec (insns @ result) cond - - | `STREXH, [|`Reg dest1; src1; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1:src1 ~base ~offset:(`Imm (word 0)) - Offset Unsigned H St - in - let result = [Bil.move (Env.of_reg dest1) (int32 0)] in - exec (insns @ result) cond - + | `LDREXD, [| multidest; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1:multidest ~base + ~offset:(`Imm (word 0)) + Offset Unsigned D Ld + in + exec insns cond + | `STREX, [| `Reg dest1; src1; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1:src1 ~base + ~offset:(`Imm (word 0)) + Offset Unsigned W St + in + let result = [ Bil.move (Env.of_reg dest1) (int32 0) ] in + exec (insns @ result) cond + | `STREX, [| `Reg dest1; src1; `Reg _ as src2; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1:src1 ~dest2:src2 ~base + ~offset:(`Imm (word 0)) + Offset Unsigned W St + in + let result = [ Bil.move (Env.of_reg dest1) (int32 0) ] in + exec (insns @ result) cond + | `STREXB, [| `Reg dest1; src1; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1:src1 ~base + ~offset:(`Imm (word 0)) + Offset Unsigned B St + in + let result = [ Bil.move (Env.of_reg dest1) (int32 0) ] in + exec (insns @ result) cond + | `STREXH, [| `Reg dest1; src1; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1:src1 ~base + ~offset:(`Imm (word 0)) + Offset Unsigned H St + in + let result = [ Bil.move (Env.of_reg dest1) (int32 0) ] in + exec (insns @ result) cond (* multisrc is one of the multireg combinations *) - | `STREXD, [|`Reg dest1; multisrc; base; cond; _|] -> - let insns = - Mem_shift.lift_r_op ~dest1:multisrc ~base ~offset:(`Imm (word 0)) - Offset Unsigned D St - in - let result = [Bil.move (Env.of_reg dest1) (int32 0)] in - exec (insns @ result) cond - | #mem_multi_insn as insn, ops -> lift_mem_multi ops insn - | insn,ops -> - fail [%here] "ops %s doesn't match mem access insn %s" - (string_of_ops ops) (Arm_insn.to_string (insn :> insn)) - - -(** Branching instructions *) + | `STREXD, [| `Reg dest1; multisrc; base; cond; _ |] -> + let insns = + Mem_shift.lift_r_op ~dest1:multisrc ~base + ~offset:(`Imm (word 0)) + Offset Unsigned D St + in + let result = [ Bil.move (Env.of_reg dest1) (int32 0) ] in + exec (insns @ result) cond + | (#mem_multi_insn as insn), ops -> lift_mem_multi ops insn + | insn, ops -> + fail [%here] "ops %s doesn't match mem access insn %s" (string_of_ops ops) + (Arm_insn.to_string (insn :> insn)) + +(* Branching instructions *) let lift_branch mem ops insn = let addr = Memory.min_addr mem in - match insn, ops with - - | `Bcc, [|offset; cond; _|] -> - Branch.lift offset ~cond addr - - | `BL, [|offset; cond; _|] - | `BL_pred, [|offset; cond; _|] -> - Branch.lift offset ~cond ~link:true addr - | `BL, [|offset|] -> - Branch.lift offset ~link:true addr - | `BX_RET, [|cond; _|] -> - Branch.lift (`Reg `LR) ~cond ~x:true addr - - | `BX, [|target|] -> - Branch.lift target ~x:true addr - - | `BX_pred, [|target; cond; _|] -> - Branch.lift target ~cond ~x:true addr - - | `BLX, [|target|] -> - Branch.lift target ~link:true ~x:true addr - - | `BLX_pred, [|target; cond; _|] -> - Branch.lift target ~cond ~link:true ~x:true addr - - | `BLXi, [|offset|] -> - Branch.lift offset ~link:true ~x:true addr - - | insn,ops -> - fail [%here] "ops %s doesn't match branch insn %s" - (string_of_ops ops) (Arm_insn.to_string (insn :> insn)) - + match (insn, ops) with + | `Bcc, [| offset; cond; _ |] -> Branch.lift offset ~cond addr + | `BL, [| offset; cond; _ |] | `BL_pred, [| offset; cond; _ |] -> + Branch.lift offset ~cond ~link:true addr + | `BL, [| offset |] -> Branch.lift offset ~link:true addr + | `BX_RET, [| cond; _ |] -> Branch.lift (`Reg `LR) ~cond ~x:true addr + | `BX, [| target |] -> Branch.lift target ~x:true addr + | `BX_pred, [| target; cond; _ |] -> Branch.lift target ~cond ~x:true addr + | `BLX, [| target |] -> Branch.lift target ~link:true ~x:true addr + | `BLX_pred, [| target; cond; _ |] -> + Branch.lift target ~cond ~link:true ~x:true addr + | `BLXi, [| offset |] -> Branch.lift offset ~link:true ~x:true addr + | insn, ops -> + fail [%here] "ops %s doesn't match branch insn %s" (string_of_ops ops) + (Arm_insn.to_string (insn :> insn)) let lift_special ops insn = - match insn, ops with + match (insn, ops) with (* supervisor call *) - | `SVC, [|`Imm word; cond; _|] -> - let num = Word.extract_exn ~hi:23 word in - exec [ - let dst = Format.asprintf "__svc(%a)" Addr.pp num in - Bil.(encode call) dst - ] cond - - | `MRS, [|`Reg dest; cond; _|] -> - let get_bits flag src lsb = - Bil.(src lor (cast unsigned 32 (var flag) lsl int32 lsb)) in - let d = Env.of_reg dest in - let vd = Bil.var d in - exec [ - Bil.move d (int32 0); - Bil.move d (get_bits Env.nf vd 31); - Bil.move d (get_bits Env.zf vd 30); - Bil.move d (get_bits Env.cf vd 29); - Bil.move d (get_bits Env.vf vd 28); - Bil.move d (get_bits Env.qf vd 27); - Bil.move d (get_bits Env.ge.(3) vd 19); - Bil.move d (get_bits Env.ge.(2) vd 18); - Bil.move d (get_bits Env.ge.(1) vd 17); - Bil.move d (get_bits Env.ge.(0) vd 16); - ] cond - + | `SVC, [| `Imm word; cond; _ |] -> + let num = Word.extract_exn ~hi:23 word in + exec + [ + (let dst = Format.asprintf "__svc(%a)" Addr.pp num in + Bil.(encode call) dst); + ] + cond + | `MRS, [| `Reg dest; cond; _ |] -> + let get_bits flag src lsb = + Bil.(src lor (cast unsigned 32 (var flag) lsl int32 lsb)) + in + let d = Env.of_reg dest in + let vd = Bil.var d in + exec + [ + Bil.move d (int32 0); + Bil.move d (get_bits Env.nf vd 31); + Bil.move d (get_bits Env.zf vd 30); + Bil.move d (get_bits Env.cf vd 29); + Bil.move d (get_bits Env.vf vd 28); + Bil.move d (get_bits Env.qf vd 27); + Bil.move d (get_bits Env.ge.(3) vd 19); + Bil.move d (get_bits Env.ge.(2) vd 18); + Bil.move d (get_bits Env.ge.(1) vd 17); + Bil.move d (get_bits Env.ge.(0) vd 16); + ] + cond (* Move to special from register * For MSR an immediate with bit x set means: * bit 0 is CPSR_c (is not valid in ARMv7) * bit 1 is CPSR_x (is not valid in ARMv7) * bit 2 is APSR_g * bit 3 is APSR_nzcvq - **) - | `MSR, [|`Imm imm; `Reg src; cond; _|] -> - let src = Bil.var (Env.of_reg src) in - let (:=) flag bit = Bil.move flag (Bil.extract bit bit src) in - let s1 = - if Word.(Int_exn.(imm land word 0x8) = word 0x8) then [ - Env.nf := 31; - Env.zf := 30; - Env.cf := 29; - Env.vf := 28; - Env.qf := 27; - ] else [] in - let s2 = - if Word.(Int_exn.(imm land word 0x4) = word 0x4) then [ - Env.ge.(3) := 19; - Env.ge.(2) := 18; - Env.ge.(1) := 17; - Env.ge.(0) := 16; - ] else [] in - exec (s1 @ s2) cond + *) + | `MSR, [| `Imm imm; `Reg src; cond; _ |] -> + let src = Bil.var (Env.of_reg src) in + let ( := ) flag bit = Bil.move flag (Bil.extract ~hi:bit ~lo:bit src) in + let s1 = + if Word.(Int_exn.(imm land word 0x8) = word 0x8) then + [ + Env.nf := 31; Env.zf := 30; Env.cf := 29; Env.vf := 28; Env.qf := 27; + ] + else [] + in + let s2 = + if Word.(Int_exn.(imm land word 0x4) = word 0x4) then + [ + Env.ge.(3) := 19; + Env.ge.(2) := 18; + Env.ge.(1) := 17; + Env.ge.(0) := 16; + ] + else [] + in + exec (s1 @ s2) cond (* All of these are nops in User mode *) | `CPS2p, _ | `DMB, _ | `DSB, _ | `HINT, _ | `PLDi12, _ -> [] - - | insn,ops -> - fail [%here] "ops %s doesn't match special insn %s" - (string_of_ops ops) (Arm_insn.to_string (insn :> insn)) + | insn, ops -> + fail [%here] "ops %s doesn't match special insn %s" (string_of_ops ops) + (Arm_insn.to_string (insn :> insn)) let arm_ops_exn ops () = - Array.map (ops) ~f:(fun op -> - Option.value_exn - ~here:[%here] - ~error:(Error.create "unsupported operand" op Op.sexp_of_t ) + Array.map ops ~f:(fun op -> + Option.value_exn ~here:[%here] + ~error:(Error.create "unsupported operand" op Op.sexp_of_t) (Arm_op.create op)) let arm_ops ops = try_with ~backtrace:true (arm_ops_exn ops) @@ -1059,37 +865,40 @@ module CPU = struct let pc = pc let sp = sp - - let regs = Var.Set.of_list [ - r0; r1; r2; r3; r4; - r5; r6; r7; r8; r9; - r10; r11; r12; - pc; sp; lr; - spsr; cpsr; itstate; - ] + let regs = + Var.Set.of_list + [ + r0; + r1; + r2; + r3; + r4; + r5; + r6; + r7; + r8; + r9; + r10; + r11; + r12; + pc; + sp; + lr; + spsr; + cpsr; + itstate; + ] (* although PC is stricly speaking is GPR we will rule it out *) - let non_gpr = Var.Set.of_list [ - pc; spsr; cpsr; itstate; - ] - - let gpr = Var.Set.diff regs non_gpr - - let perms = Var.Set.of_list [ - r4; r5; r6; r7; r8; r9; r10; r11; - ] - - let flags = Var.Set.of_list @@ [ - nf; zf; cf; qf; vf; - ] @ Array.to_list ge - + let non_gpr = Var.Set.of_list [ pc; spsr; cpsr; itstate ] + let gpr = Set.diff regs non_gpr + let perms = Var.Set.of_list [ r4; r5; r6; r7; r8; r9; r10; r11 ] + let flags = Var.Set.of_list @@ [ nf; zf; cf; qf; vf ] @ Array.to_list ge let nf = nf let zf = zf let cf = cf let vf = vf - let is = Var.same - let is_reg r = Set.mem regs (Var.base r) let is_sp = is sp let is_bp = is r11 @@ -1100,40 +909,46 @@ module CPU = struct let is_cf = is cf let is_vf = is vf let is_nf = is nf - let is_mem = is mem end -(** Substitute PC with its value *) -let resolve_pc mem = Stmt.map (object - inherit Stmt.mapper as super - method! map_var var = - if Var.(equal var CPU.pc) then - Bil.int (CPU.addr_of_pc mem) - else super#map_var var +(** Substitute PC with its value *) +let resolve_pc mem = + Stmt.map + (object + inherit Stmt.mapper as super + + method! map_var var = + if Var.(equal var CPU.pc) then Bil.int (CPU.addr_of_pc mem) + else super#map_var var - method! map_move v x = - if Var.(equal v CPU.pc) - then super#map_jmp x - else super#map_move v x - end) + method! map_move v x = + if Var.(equal v CPU.pc) then super#map_jmp x else super#map_move v x + end) let insn_exn mem insn : bil Or_error.t = - let encoding = Theory.Language.read ~package:"bap" (Basic.Insn.encoding insn) in - Memory.(Addr.Int_err.(!$(max_addr mem) - !$(min_addr mem))) - >>= Word.to_int >>= fun s -> Size.of_int ((s+1) * 8) >>= fun scale -> + let encoding = + Theory.Language.read ~package:"bap" (Basic.Insn.encoding insn) + in + Memory.(Addr.Int_err.(!$(max_addr mem) - !$(min_addr mem))) >>= Word.to_int + >>= fun s -> + Size.of_int ((s + 1) * 8) >>= fun scale -> Memory.get ~scale mem >>= fun word -> match Arm_insn.of_basic insn with | None -> Ok [] - | Some arm_insn -> match arm_ops (Basic.Insn.ops insn) with - | Error _ as fail -> fail - | Ok ops -> Result.return @@ match arm_insn with - | #move_insn as op -> lift_move ~encoding word ops op - | #bits_insn as op -> lift_bits word ops op - | #mult_insn as op -> lift_mult ops op - | #mem_insn as op -> lift_mem ops op - | #branch_insn as op -> lift_branch mem ops op - | #special_insn as op -> lift_special ops op + | Some arm_insn -> ( + match arm_ops (Basic.Insn.ops insn) with + | Error _ as fail -> fail + | Ok ops -> ( + Result.return + @@ + match arm_insn with + | #move_insn as op -> lift_move ~encoding word ops op + | #bits_insn as op -> lift_bits word ops op + | #mult_insn as op -> lift_mult ops op + | #mem_insn as op -> lift_mem ops op + | #branch_insn as op -> lift_branch mem ops op + | #special_insn as op -> lift_special ops op)) let lift mem insn = try insn_exn mem insn >>| resolve_pc mem with diff --git a/lib/arm/arm_lifter.mli b/lib/arm/arm_lifter.mli index 98288c7b4..6c222fd48 100644 --- a/lib/arm/arm_lifter.mli +++ b/lib/arm/arm_lifter.mli @@ -1,7 +1,7 @@ open Bap.Std -(** [lift mem insn] lifts instruction. *) val lift : lifter +(** [lift mem insn] lifts instruction. *) module CPU : sig include module type of Arm_env diff --git a/lib/arm/arm_mem.ml b/lib/arm/arm_mem.ml index 051db2c42..aee0b9b8b 100644 --- a/lib/arm/arm_mem.ml +++ b/lib/arm/arm_mem.ml @@ -1,7 +1,6 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std - open Arm_types open Arm_utils module Env = Arm_env @@ -10,136 +9,144 @@ module Env = Arm_env let is_pc v = Var.equal v Env.pc - (* Doug TODO check for misaligned access *) (* Single-register memory access *) -let lift_r ~(dst1 : Var.t) ?(dst2 : Var.t option) ~(base : Var.t) +let lift_r ~(dst1 : Var.t) ?(dst2 : Var.t option) ~(base : Var.t) ~(offset : exp) mode sign size operation = let o_base = tmp reg32_t in (* If this load is a jump (only valid for 4-byte load) * We need to do the write_back before the load so we * Use the originals - **) - let address = match mode, operation, size, dst1 with + *) + let address = + match (mode, operation, size, dst1) with | PostIndex, Ld, W, d when is_pc d -> Bil.var o_base - | PreIndex, Ld, W, d when is_pc d -> Bil.(var o_base + offset) - | PostIndex, _, _, _ -> Bil.var base - | PreIndex, _, _, _ | Offset, _, _, _ -> Bil.(var base + offset) in + | PreIndex, Ld, W, d when is_pc d -> Bil.(var o_base + offset) + | PostIndex, _, _, _ -> Bil.var base + | PreIndex, _, _, _ | Offset, _, _, _ -> Bil.(var base + offset) + in (* Create temps for original if this is a jump *) - let pre_write_back = match mode, operation, size, dst1 with - | PreIndex, Ld, W, d when is_pc d -> [ - Bil.move o_base Bil.(var base); - Bil.move base Bil.(var base + offset) - ] - | PostIndex, Ld, W, d when is_pc d -> [ - Bil.move o_base Bil.(var base); - Bil.move base Bil.(var base + offset) - ] + let pre_write_back = + match (mode, operation, size, dst1) with + | PreIndex, Ld, W, d when is_pc d -> + [ + Bil.move o_base Bil.(var base); Bil.move base Bil.(var base + offset); + ] + | PostIndex, Ld, W, d when is_pc d -> + [ + Bil.move o_base Bil.(var base); Bil.move base Bil.(var base + offset); + ] | Offset, _, _, _ -> [] - | _ -> [] in + | _ -> [] + in - let write_back = match mode, operation, size, dst1 with - | PreIndex, Ld, W, d when is_pc d -> [] + let write_back = + match (mode, operation, size, dst1) with + | PreIndex, Ld, W, d when is_pc d -> [] | PostIndex, Ld, W, d when is_pc d -> [] - | Offset, _, _, _ -> [] - | _ -> [Bil.move base Bil.(var base + offset)] in + | Offset, _, _, _ -> [] + | _ -> [ Bil.move base Bil.(var base + offset) ] + in - let typ = match size with - | B -> `r8 - | H -> `r16 - | W | D -> `r32 in + let typ = match size with B -> `r8 | H -> `r16 | W | D -> `r32 in - let store m n v = Bil.(store m n v LittleEndian typ) in - let load m n = Bil.(load m n LittleEndian typ) in + let store mem addr v = Bil.(store ~mem ~addr v LittleEndian typ) in + let load mem addr = Bil.(load ~mem ~addr LittleEndian typ) in - let temp = match size with - | B -> tmp reg8_t - | H -> tmp reg16_t - | _ -> dst1 in + let temp = match size with B -> tmp reg8_t | H -> tmp reg16_t | _ -> dst1 in let four = Bil.int (Word.of_int 4 ~width:32) in match operation with | Ld -> - let rhs = cast_of_sign sign 32 Bil.(var temp) in - let extend = match size with - | B | H -> [Bil.move dst1 rhs] - | W | D -> [] in - let loads = - let mem = Bil.var (Env.mem) in - if [%compare.equal: size] size D then [ - Bil.move dst1 (load mem address); - Bil.move (Option.value_exn dst2) (load mem Bil.(address + four)); - ] else [ - assn temp (load mem address); - ] in - List.concat [ - pre_write_back; - loads; - extend; (* sign/zero extend if necessary *) - write_back; - ] - | St -> - (* truncate the value if necessary *) - let trunc = match size with - | B | H -> - let n = if [%compare.equal: size] size B then 8 else 16 in - [Bil.move temp Bil.(cast low n (var dst1))] - | W | D -> [] in - let stores = - let m = Env.mem in - let v = Bil.var m in - match size with - | D -> [ - Bil.move m (store v address Bil.(var dst1)); - Bil.move m (store v - Bil.(address + four) Bil.(var (Option.value_exn dst2))); + let rhs = cast_of_sign sign 32 Bil.(var temp) in + let extend = + match size with B | H -> [ Bil.move dst1 rhs ] | W | D -> [] + in + let loads = + let mem = Bil.var Env.mem in + if [%compare.equal: size] size D then + [ + Bil.move dst1 (load mem address); + Bil.move (Option.value_exn dst2) (load mem Bil.(address + four)); + ] + else [ assn temp (load mem address) ] + in + List.concat + [ + pre_write_back; + loads; + extend; + (* sign/zero extend if necessary *) + write_back; ] - | B | H | W -> [ - Bil.move m (store v address Bil.(var temp)); - ] in - List.concat [ - trunc; (* truncate the value if necessary *) - stores; - write_back - ] + | St -> + (* truncate the value if necessary *) + let trunc = + match size with + | B | H -> + let n = if [%compare.equal: size] size B then 8 else 16 in + [ Bil.move temp Bil.(cast low n (var dst1)) ] + | W | D -> [] + in + let stores = + let m = Env.mem in + let v = Bil.var m in + match size with + | D -> + [ + Bil.move m (store v address Bil.(var dst1)); + Bil.move m + (store v Bil.(address + four) Bil.(var (Option.value_exn dst2))); + ] + | B | H | W -> [ Bil.move m (store v address Bil.(var temp)) ] + in + List.concat + [ trunc; (* truncate the value if necessary *) stores; write_back ] let lift_m dest_list base mode update operation = let o_base = tmp reg32_t in - let calc_offset ith = match mode with - | IB -> 4 * (ith + 1) + let calc_offset ith = + match mode with + | IB -> 4 * (ith + 1) | DB -> -4 * (ith + 1) - | IA -> 4 * ith - | DA -> -4 * ith in + | IA -> 4 * ith + | DA -> -4 * ith + in let writeback = - let dest_len = - Word.of_int ~width:32 (4 * List.length dest_list) in + let dest_len = Word.of_int ~width:32 (4 * List.length dest_list) in match update with | NoUpdate -> [] | Update -> - let (+-) = match mode with - | IB | IA -> Bil.(+) - | DB | DA -> Bil.(-) - in [Bil.move base Bil.(var base +- int dest_len)] in + let ( +- ) = + match mode with IB | IA -> Bil.( + ) | DB | DA -> Bil.( - ) + in + [ Bil.move base Bil.(var base +- int dest_len) ] + in let create_access i dest = let offset_e = Word.of_int ~width:32 (calc_offset i) in let mem = Bil.var Env.mem in let addr = Bil.(var o_base + int offset_e) in match operation with - | Ld -> assn dest Bil.(load mem addr LittleEndian `r32) - | St -> Bil.move Env.mem - Bil.(store Env.(var mem) addr (var dest) LittleEndian `r32) in + | Ld -> assn dest Bil.(load ~mem ~addr LittleEndian `r32) + | St -> + Bil.move Env.mem + Bil.(store ~mem:Env.(var mem) ~addr (var dest) LittleEndian `r32) + in (* Jmps should always be the last statement *) let rec move_jump_to_end l = match l with - [] -> [] - | (stmt :: stmts) -> - match stmt with - | (Bil.Jmp exp) -> stmts @ [Bil.Jmp exp] - | _ -> stmt :: move_jump_to_end stmts in - move_jump_to_end (List.concat [ - Bil.([o_base := var base]); - List.mapi ~f:create_access dest_list; - writeback - ]) + | [] -> [] + | stmt :: stmts -> ( + match stmt with + | Bil.Jmp exp -> stmts @ [ Bil.Jmp exp ] + | _ -> stmt :: move_jump_to_end stmts) + in + move_jump_to_end + (List.concat + [ + Bil.[ o_base := var base ]; + List.mapi ~f:create_access dest_list; + writeback; + ]) diff --git a/lib/arm/arm_mem.mli b/lib/arm/arm_mem.mli index c64c8b524..e6f164e11 100644 --- a/lib/arm/arm_mem.mli +++ b/lib/arm/arm_mem.mli @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types @@ -10,6 +10,7 @@ val lift_r : mode_r -> sign -> size -> - operation -> stmt list + operation -> + stmt list val lift_m : var list -> var -> mode_m -> update_m -> operation -> stmt list diff --git a/lib/arm/arm_mem_shift.ml b/lib/arm/arm_mem_shift.ml index 5fd1bf9e5..2af28d778 100644 --- a/lib/arm/arm_mem_shift.ml +++ b/lib/arm/arm_mem_shift.ml @@ -1,13 +1,11 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std open Or_error - open Arm_types open Arm_utils - -module Mem = Arm_mem -module Env = Arm_env +module Mem = Arm_mem +module Env = Arm_env module Shift = Arm_shift let string_of_opt_op = function @@ -19,22 +17,21 @@ module Z = Word.Int_exn let word x = Word.of_int x ~width:32 let repair_imm (src : word) ~sign_mask ~imm_mask rtype : exp = - let bit_set = - Word.(Z.(word sign_mask land src) = word sign_mask) in + let bit_set = Word.(Z.(word sign_mask land src) = word sign_mask) in let negate = - (bit_set && [%compare.equal: repair] rtype `NEG) || - (not bit_set && [%compare.equal: repair] rtype `POS) in + (bit_set && [%compare.equal: repair] rtype `NEG) + || ((not bit_set) && [%compare.equal: repair] rtype `POS) + in let offset = Z.(src land word imm_mask) in Bil.int (if negate then Z.neg offset else offset) let repair_reg reg imm ~sign_mask rtype = - let bit_set = - Word.(Z.(word sign_mask land imm) = word sign_mask) in + let bit_set = Word.(Z.(word sign_mask land imm) = word sign_mask) in let negate = - (bit_set && [%compare.equal: repair] rtype `NEG) || - (not bit_set && [%compare.equal: repair] rtype `POS) + (bit_set && [%compare.equal: repair] rtype `NEG) + || ((not bit_set) && [%compare.equal: repair] rtype `POS) in - let m_one = Word.(ones (bitwidth imm)) in + let m_one = Word.(ones (bitwidth imm)) in if negate then Bil.(int m_one * reg) else reg let lift_r_op ~dest1 ?dest2 ?shift ~base ~offset mode sign size operation = @@ -43,63 +40,58 @@ let lift_r_op ~dest1 ?dest2 ?shift ~base ~offset mode sign size operation = match offset with | `Reg r -> Bil.(var (Env.of_reg r)) | `Imm w -> - let width = Word.bitwidth w in - let _1 = Word.one 32 in - let min_32 = Word.(_1 lsl Word.of_int 31 ~width) in - if Word.(w = min_32) - then Bil.(int Word.(zero width)) - else Bil.(int w) in + let width = Word.bitwidth w in + let _1 = Word.one 32 in + let min_32 = Word.(_1 lsl Word.of_int 31 ~width) in + if Word.(w = min_32) then Bil.(int Word.(zero width)) else Bil.(int w) + in - let offset = match shift with + let offset = + match shift with | Some s -> Shift.lift_mem ~src:offset s reg32_t - | None -> offset in - match dest1, dest2 with - | (`Reg (#gpr_reg as d1), Some (`Reg (#gpr_reg as d2))) -> - Mem.lift_r ~dst1:(Env.of_reg d1) ~dst2:(Env.of_reg d2) - ~base ~offset mode sign size operation - | `Reg (#gpr_reg as d), None -> - Mem.lift_r ~dst1:(Env.of_reg d) ~base ~offset mode sign size - operation - | op1,op2 -> fail [%here] "Unexpected arguments: %s, %s" - (Arm_op.to_string (op1 : Arm_op.t)) - (string_of_opt_op op2) + | None -> offset + in + match (dest1, dest2) with + | `Reg (#gpr_reg as d1), Some (`Reg (#gpr_reg as d2)) -> + Mem.lift_r ~dst1:(Env.of_reg d1) ~dst2:(Env.of_reg d2) ~base ~offset mode + sign size operation + | `Reg (#gpr_reg as d), None -> + Mem.lift_r ~dst1:(Env.of_reg d) ~base ~offset mode sign size operation + | op1, op2 -> + fail [%here] "Unexpected arguments: %s, %s" + (Arm_op.to_string (op1 : Arm_op.t)) + (string_of_opt_op op2) let lift_r_exp ~dest1 ?dest2 ~base ~offset mode sign size operation = let dest1 = assert_reg [%here] dest1 |> Env.of_reg in let base = assert_reg [%here] base |> Env.of_reg in match dest2 with | Some dest2 -> - let dest2 = assert_reg [%here] dest2 |> Env.of_reg in - Mem.lift_r ~dst1:dest1 ~dst2:dest2 - ~base ~offset mode sign size operation - | None -> - Mem.lift_r ~dst1:dest1 - ~base ~offset mode sign size operation - + let dest2 = assert_reg [%here] dest2 |> Env.of_reg in + Mem.lift_r ~dst1:dest1 ~dst2:dest2 ~base ~offset mode sign size operation + | None -> Mem.lift_r ~dst1:dest1 ~base ~offset mode sign size operation let lift_m dest_list base mode update operation = let base = assert_reg [%here] base in - let dest_list = List.map dest_list - ~f:(fun d -> assert_reg [%here] d |> Env.of_reg) in + let dest_list = + List.map dest_list ~f:(fun d -> assert_reg [%here] d |> Env.of_reg) + in let base = Env.of_reg base in Mem.lift_m dest_list base mode update operation - (* Decides whether to use the register or immediate as the offset value * Also performs conversion to remove the negative bit and the - **) + *) let mem_offset_reg_or_imm_neg reg_off imm_off = match reg_off with - | `Reg #nil_reg -> - repair_imm imm_off ~sign_mask:0x100 ~imm_mask:0xff `NEG + | `Reg #nil_reg -> repair_imm imm_off ~sign_mask:0x100 ~imm_mask:0xff `NEG | `Reg (#gpr_reg as reg) -> - repair_reg Bil.(var (Env.of_reg reg)) imm_off ~sign_mask:0x100 `NEG + repair_reg Bil.(var (Env.of_reg reg)) imm_off ~sign_mask:0x100 `NEG | op -> fail [%here] "unexpected operand: %s" (Arm_op.to_string op) let mem_offset_reg_or_imm_pos reg_off imm_off = match reg_off with - | `Reg #nil_reg -> - repair_imm imm_off ~sign_mask:0x100 ~imm_mask:0xff `POS + | `Reg #nil_reg -> repair_imm imm_off ~sign_mask:0x100 ~imm_mask:0xff `POS | `Reg (#gpr_reg as reg) -> - repair_reg Bil.(var (Env.of_reg reg)) imm_off ~sign_mask:0x1 `POS + repair_reg Bil.(var (Env.of_reg reg)) imm_off ~sign_mask:0x1 `POS | op -> fail [%here] "unexpected operand: %s" (Arm_op.to_string op) diff --git a/lib/arm/arm_mem_shift.mli b/lib/arm/arm_mem_shift.mli index b219add37..d91688643 100644 --- a/lib/arm/arm_mem_shift.mli +++ b/lib/arm/arm_mem_shift.mli @@ -1,11 +1,9 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types (** Combine Mem and Shift *) - - val lift_r_exp : dest1:op -> ?dest2:op -> @@ -14,8 +12,8 @@ val lift_r_exp : mode_r -> sign -> size -> - operation -> stmt list - + operation -> + stmt list val lift_r_op : dest1:op -> @@ -26,34 +24,30 @@ val lift_r_op : mode_r -> sign -> size -> - operation -> stmt list - + operation -> + stmt list val lift_m : op list -> op -> mode_m -> update_m -> operation -> stmt list -(** takes a word and converts it to an exp that is the offset for some - memory instructions sign_mask - a bitmask that determines the bit - in src that is the repair bit imm_mask - a bitmask that determines - which bits in src are the immediate type - whether a set mask - indicates a positive or negative immediate. **) val repair_imm : word -> sign_mask:int -> imm_mask:int -> repair -> exp +(** takes a word and converts it to an exp that is the offset for some memory + instructions sign_mask - a bitmask that determines the bit in src that is + the repair bit imm_mask - a bitmask that determines which bits in src are + the immediate type - whether a set mask indicates a positive or negative + immediate. **) -(** takes a word and a register and converts it to an exp that is the - offset for some memory instructions sign_mask - a bitmask that - determines the bit in src that is the negative bit rtype - whether - a set mask indicates a positive or negative operand. *) +(** takes a word and a register and converts it to an exp that is the offset for + some memory instructions sign_mask - a bitmask that determines the bit in + src that is the negative bit rtype - whether a set mask indicates a positive + or negative operand. *) val repair_reg : exp -> word -> sign_mask:int -> repair -> exp +(** Decides whether to use the register or immediate as the offset value Also + performs conversion to remove the negative bit and the *) -(** Decides whether to use the register or immediate as the offset - value Also performs conversion to remove the negative bit and the *) - - -(** Decides whether to use the register or immediate as the offset - value Also performs conversion to remove the negative bit and the *) +(** Decides whether to use the register or immediate as the offset value Also + performs conversion to remove the negative bit and the *) val mem_offset_reg_or_imm_neg : op -> word -> exp - - val mem_offset_reg_or_imm_pos : op -> word -> exp diff --git a/lib/arm/arm_mov.ml b/lib/arm/arm_mov.ml index 77ae91791..1a0c6a5eb 100644 --- a/lib/arm/arm_mov.ml +++ b/lib/arm/arm_mov.ml @@ -1,19 +1,16 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap_core_theory open Regular.Std open Bap.Std - open Arm_types open Arm_utils open Arm_flags - module Env = Arm_env module Shift = Arm_shift let width = 32 - -(** Modified Immediate Constants *) +(** Modified Immediate Constants *) module MIC : sig val decode : Theory.language -> exp -> exp end = struct @@ -34,73 +31,87 @@ end = struct let decode encoding = function | Bil.Int x as v -> - if Theory.Language.equal encoding Arm_target.llvm_t32 then v - else Bil.Int (mic x) + if Theory.Language.equal encoding Arm_target.llvm_t32 then v + else Bil.Int (mic x) | other -> other end -let lift ?(encoding=Theory.Language.unknown) - ?dest src1 ?src2 (itype ) ?sreg ?simm raw ~wflag cond = - let dest : var = match dest with - | None -> tmp reg32_t +let lift ?(encoding = Theory.Language.unknown) ?dest src1 ?src2 itype ?sreg + ?simm raw ~wflag cond = + let dest : var = + match dest with + | None -> tmp reg32_t | Some (`Reg reg) -> Env.of_reg reg - | Some (`Imm _) -> fail [%here] "dest is not a reg" in + | Some (`Imm _) -> fail [%here] "dest is not a reg" + in let s1 : exp = MIC.decode encoding @@ exp_of_op src1 in - let s2 : exp = match src2 with + let s2 : exp = + match src2 with | Some src -> MIC.decode encoding @@ exp_of_op src - | None -> zero reg32_t in + | None -> zero reg32_t + in let unshifted = tmp reg32_t in (* Do the register shift *) let s1, s2, stmts, carry = - match itype, sreg, simm with - | `MOV, Some sreg, Some simm - | `MVN, Some sreg, Some simm -> - let shifted, carry = Shift.lift_r - ~src:Bil.(var unshifted) simm - ~shift:(exp_of_op sreg) reg32_t in - shifted, s2, [Bil.move unshifted s1], carry + match (itype, sreg, simm) with + | `MOV, Some sreg, Some simm | `MVN, Some sreg, Some simm -> + let shifted, carry = + Shift.lift_r + ~src:Bil.(var unshifted) + simm ~shift:(exp_of_op sreg) reg32_t + in + (shifted, s2, [ Bil.move unshifted s1 ], carry) | _, Some sreg, Some simm -> - let shifted, carry = Shift.lift_r - ~src:Bil.(var unshifted) simm - ~shift:(exp_of_op sreg) reg32_t in - s1, shifted, [Bil.move unshifted s2], carry - | `MOV, None, Some simm - | `MVN, None, Some simm -> - let shifted, carry = Shift.lift_i - ~src:Bil.(var unshifted) simm reg32_t in - shifted, s2, [Bil.move unshifted s1], carry + let shifted, carry = + Shift.lift_r + ~src:Bil.(var unshifted) + simm ~shift:(exp_of_op sreg) reg32_t + in + (s1, shifted, [ Bil.move unshifted s2 ], carry) + | `MOV, None, Some simm | `MVN, None, Some simm -> + let shifted, carry = + Shift.lift_i ~src:Bil.(var unshifted) simm reg32_t + in + (shifted, s2, [ Bil.move unshifted s1 ], carry) | _, None, Some simm -> - let shifted, carry = Shift.lift_i - ~src:Bil.(var unshifted) simm reg32_t in - s1, shifted, [Bil.move unshifted s2], carry - | _ -> s1, s2, [], Bil.var Env.cf in + let shifted, carry = + Shift.lift_i ~src:Bil.(var unshifted) simm reg32_t + in + (s1, shifted, [ Bil.move unshifted s2 ], carry) + | _ -> (s1, s2, [], Bil.var Env.cf) + in - let stmts, flags = match itype, src1, src2 with + let stmts, flags = + match (itype, src1, src2) with | `MOV, `Imm i64, _ | `MVN, `Imm i64, _ - | `AND, _, Some (`Imm i64) - | `BIC, _, Some (`Imm i64) - | `EOR, _, Some (`Imm i64) - | `ORR, _, Some (`Imm i64) -> - stmts, set_cf_data i64 raw :: set_nzf Bil.(var dest) reg32_t + | `AND, _, Some (`Imm i64) + | `BIC, _, Some (`Imm i64) + | `EOR, _, Some (`Imm i64) + | `ORR, _, Some (`Imm i64) -> + (stmts, set_cf_data ~imm:i64 ~data:raw :: set_nzf Bil.(var dest) reg32_t) | #move, _, _ -> - stmts, Bil.move Env.cf carry :: set_nzf Bil.(var dest) reg32_t - | #arth as itype1, _, _ -> - let orig1 = tmp reg32_t in - let orig2 = tmp reg32_t in - let v1,v2,vd = Bil.(var orig1, var orig2, var dest) in - let flags = match itype1 with - | `SUB -> set_sub v1 v2 vd reg32_t - | `RSB -> set_sub v2 v1 vd reg32_t - | `ADD -> set_add v1 v2 vd reg32_t - | `ADC -> set_adc v1 v2 vd reg32_t - | `SBC -> set_sbc v1 v2 vd reg32_t - | `RSC -> set_sbc v2 v1 vd reg32_t in - stmts @ [Bil.move orig1 s1; Bil.move orig2 s2], flags in + (stmts, Bil.move Env.cf carry :: set_nzf Bil.(var dest) reg32_t) + | (#arth as itype1), _, _ -> + let orig1 = tmp reg32_t in + let orig2 = tmp reg32_t in + let v1, v2, vd = Bil.(var orig1, var orig2, var dest) in + let flags = + match itype1 with + | `SUB -> set_sub v1 v2 vd reg32_t + | `RSB -> set_sub v2 v1 vd reg32_t + | `ADD -> set_add v1 v2 vd reg32_t + | `ADC -> set_adc v1 v2 vd reg32_t + | `SBC -> set_sbc v1 v2 vd reg32_t + | `RSC -> set_sbc v2 v1 vd reg32_t + in + (stmts @ [ Bil.move orig1 s1; Bil.move orig2 s2 ], flags) + in let vcf = Bil.var Env.cf in - let oper = match itype with + let oper = + match itype with | `AND -> Bil.(s1 land s2) | `BIC -> Bil.(s1 land lnot s2) | `EOR -> Bil.(s1 lxor s2) @@ -112,5 +123,6 @@ let lift ?(encoding=Theory.Language.unknown) | `ADD -> Bil.(s1 + s2) | `ADC -> Bil.(s1 + s2 + cast unsigned 32 vcf) | `SBC -> Bil.(s1 + lnot s2 + cast unsigned 32 vcf) - | `RSC -> Bil.(lnot s1 + s2 + cast unsigned 32 vcf) in - exec (stmts @ [assn dest oper]) ~flags ~wflag cond + | `RSC -> Bil.(lnot s1 + s2 + cast unsigned 32 vcf) + in + exec (stmts @ [ assn dest oper ]) ~flags ~wflag cond diff --git a/lib/arm/arm_mov.mli b/lib/arm/arm_mov.mli index a786fd032..a6eab2435 100644 --- a/lib/arm/arm_mov.mli +++ b/lib/arm/arm_mov.mli @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap_core_theory open Bap.Std open Arm_types diff --git a/lib/arm/arm_mul.ml b/lib/arm/arm_mul.ml index e597fb395..707490740 100644 --- a/lib/arm/arm_mul.ml +++ b/lib/arm/arm_mul.ml @@ -1,12 +1,10 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std - open Arm_types open Arm_utils - -module Mov = Arm_mov -module Env = Arm_env +module Mov = Arm_mov +module Env = Arm_env module Shift = Arm_shift module Flags = Arm_flags @@ -15,28 +13,33 @@ let lift_mull ~lodest ~hidest ~src1 ~src2 sign ?addend ~wflag cond = let hidest = assert_reg [%here] hidest in let s1_64, s2_64 = let cast src = cast_of_sign sign 64 (exp_of_op src) in - cast src1, cast src2 in + (cast src1, cast src2) + in let result = tmp reg64_t in - let eres = Bil.var result in + let eres = Bil.var result in let flags = Flags.set_nzf eres reg64_t in - let opn = match addend with - | Some _ -> Bil.(s1_64 * s2_64 + - concat (exp_of_reg hidest) (exp_of_reg lodest)) - | None -> Bil.(s1_64 * s2_64) in - let insns = [ - Bil.move result opn; - Bil.move (Env.of_reg lodest) Bil.(extract 31 0 eres); - Bil.move (Env.of_reg hidest) Bil.(extract 63 32 eres); - ] in + let opn = + match addend with + | Some _ -> + Bil.((s1_64 * s2_64) + concat (exp_of_reg hidest) (exp_of_reg lodest)) + | None -> Bil.(s1_64 * s2_64) + in + let insns = + [ + Bil.move result opn; + Bil.move (Env.of_reg lodest) Bil.(extract ~hi:31 ~lo:0 eres); + Bil.move (Env.of_reg hidest) Bil.(extract ~hi:63 ~lo:32 eres); + ] + in exec insns ~flags ~wflag cond let lift_smul ~dest ?hidest ~src1 ~src2 ?accum ?hiaccum ?q size cond = let dest = assert_reg [%here] dest in let src1 = exp_of_op src1 in let src2 = exp_of_op src2 in - let excast hi lo s = Bil.(cast signed 64 (extract hi lo s)) in - let top = excast 31 16 in - let bot = excast 15 0 in + let excast hi lo s = Bil.(cast signed 64 (extract ~hi ~lo s)) in + let top = excast 31 16 in + let bot = excast 15 0 in let top32 = excast 47 16 in let res = tmp reg64_t in let result = @@ -46,32 +49,37 @@ let lift_smul ~dest ?hidest ~src1 ~src2 ?accum ?hiaccum ?q size cond = | BT -> bot src1 * top src2 | TB -> top src1 * bot src2 | TT -> top src1 * top src2 - | D -> top src1 * top src2 + bot src1 * bot src2 - | DX -> top src1 * bot src2 + bot src1 * top src2 + | D -> (top src1 * top src2) + (bot src1 * bot src2) + | DX -> (top src1 * bot src2) + (bot src1 * top src2) | WB -> top32 (cast signed 64 (src1 * bot src2)) - | WT -> top32 (cast signed 64 (src1 * top src2)) in + | WT -> top32 (cast signed 64 (src1 * top src2)) + in let result = let open Bil in - match accum, hiaccum with - | None, None -> result - | Some a, None -> result + cast signed 64 (exp_of_op a) + match (accum, hiaccum) with + | None, None -> result + | Some a, None -> result + cast signed 64 (exp_of_op a) | Some a, Some hia -> result + concat (exp_of_op hia) (exp_of_op a) - | _ -> fail [%here] "Cannot specify only a hi accumulator" in + | _ -> fail [%here] "Cannot specify only a hi accumulator" + in let qflag = match q with - | Some true -> - [Bil.move Env.qf Bil.(excast 31 0 (var res) <> (var res))] - | _ -> [] in + | Some true -> [ Bil.move Env.qf Bil.(excast 31 0 (var res) <> var res) ] + | _ -> [] + in let instr = match hidest with - | Some (`Reg hid) -> [ - Bil.move res result; - Bil.move (Env.of_reg hid) Bil.(extract 63 32 (var res)); - Bil.move (Env.of_reg dest) Bil.(extract 31 0 (var res)); - ] - | None -> [ - Bil.move res result; - Bil.move (Env.of_reg dest) Bil.(extract 31 0 (var res)); - ] - | _ -> fail [%here] "unexpected operand type" in + | Some (`Reg hid) -> + [ + Bil.move res result; + Bil.move (Env.of_reg hid) Bil.(extract ~hi:63 ~lo:32 (var res)); + Bil.move (Env.of_reg dest) Bil.(extract ~hi:31 ~lo:0 (var res)); + ] + | None -> + [ + Bil.move res result; + Bil.move (Env.of_reg dest) Bil.(extract ~hi:31 ~lo:0 (var res)); + ] + | _ -> fail [%here] "unexpected operand type" + in exec (instr @ qflag) cond diff --git a/lib/arm/arm_mul.mli b/lib/arm/arm_mul.mli index b2f57a49b..36c24cb82 100644 --- a/lib/arm/arm_mul.mli +++ b/lib/arm/arm_mul.mli @@ -1,15 +1,26 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types val lift_mull : lodest:op -> hidest:op -> - src1:op -> src2:op -> sign -> ?addend:'a -> wflag:op -> op -> stmt list + src1:op -> + src2:op -> + sign -> + ?addend:'a -> + wflag:op -> + op -> + stmt list val lift_smul : dest:op -> ?hidest:op -> src1:op -> src2:op -> - ?accum:op -> ?hiaccum:op -> ?q:bool -> smul_size -> op -> stmt list + ?accum:op -> + ?hiaccum:op -> + ?q:bool -> + smul_size -> + op -> + stmt list diff --git a/lib/arm/arm_op.ml b/lib/arm/arm_op.ml index 3cec22182..d9118b576 100644 --- a/lib/arm/arm_op.ml +++ b/lib/arm/arm_op.ml @@ -1,25 +1,22 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std - module Arm = Arm_types type t = Arm.op [@@deriving bin_io, compare, sexp] - let create : op -> Arm.op option = let open Option.Monad_infix in function | Op.Fmm fmm -> None | Op.Reg reg -> Arm_reg.create reg >>| fun reg -> `Reg reg - | Op.Imm imm -> - Imm.to_word ~width:32 imm >>| fun imm -> `Imm imm + | Op.Imm imm -> Imm.to_word ~width:32 imm >>| fun imm -> `Imm imm + +include Regular.Make (struct + type t = Arm.op [@@deriving bin_io, compare, sexp] -include Regular.Make(struct - type t = Arm.op [@@deriving bin_io, compare, sexp] - let module_name = Some "Arm.Op" - let version = "1.0.0" - let pp fmt op = - Format.fprintf fmt "%a" Sexp.pp (sexp_of_t op) - let hash op = Hashtbl.hash op - end) + let module_name = Some "Arm.Op" + let version = "1.0.0" + let pp fmt op = Format.fprintf fmt "%a" Sexp.pp (sexp_of_t op) + let hash op = Hashtbl.hash op +end) diff --git a/lib/arm/arm_op.mli b/lib/arm/arm_op.mli index 294210764..86fae6f60 100644 --- a/lib/arm/arm_op.mli +++ b/lib/arm/arm_op.mli @@ -2,9 +2,9 @@ open Bap.Std open Regular.Std type t = Arm_types.op [@@deriving bin_io, compare, sexp] -include Regular.S with type t := t +include Regular.S with type t := t -(** [create op] projects bap generic operand into arm specific. - Floating point operands are currently ignored.*) val create : op -> t option +(** [create op] projects bap generic operand into arm specific. Floating point + operands are currently ignored.*) diff --git a/lib/arm/arm_reg.ml b/lib/arm/arm_reg.ml index c91ec0d8a..84b3af6b0 100644 --- a/lib/arm/arm_reg.ml +++ b/lib/arm/arm_reg.ml @@ -1,18 +1,17 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std open Arm_helpers type t = Arm_types.reg [@@deriving bin_io, compare, sexp] -let create reg : t option = - sexpable_of_string t_of_sexp (Reg.name reg) +let create reg : t option = sexpable_of_string t_of_sexp (Reg.name reg) -include Regular.Make(struct - type nonrec t = t [@@deriving bin_io, compare, sexp] - let hash (reg : t) = Hashtbl.hash reg - let module_name = Some "ARM.Reg" - let version = "1.0.0" - let pp fmt reg = - Format.fprintf fmt "%a" Sexp.pp (sexp_of_t reg) - end) +include Regular.Make (struct + type nonrec t = t [@@deriving bin_io, compare, sexp] + + let hash (reg : t) = Hashtbl.hash reg + let module_name = Some "ARM.Reg" + let version = "1.0.0" + let pp fmt reg = Format.fprintf fmt "%a" Sexp.pp (sexp_of_t reg) +end) diff --git a/lib/arm/arm_reg.mli b/lib/arm/arm_reg.mli index e9d8bda45..dfe9c7ae2 100644 --- a/lib/arm/arm_reg.mli +++ b/lib/arm/arm_reg.mli @@ -3,7 +3,7 @@ open Regular.Std type t = Arm_types.reg [@@deriving bin_io, compare, sexp] -(** lifts basic register to a ARM one *) val create : reg -> t option +(** lifts basic register to a ARM one *) include Regular.S with type t := t diff --git a/lib/arm/arm_shift.ml b/lib/arm/arm_shift.ml index 0eccbbe36..4ed7b8a72 100644 --- a/lib/arm/arm_shift.ml +++ b/lib/arm/arm_shift.ml @@ -1,21 +1,18 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Or_error - open Arm_types open Arm_utils - module Env = Arm_env - -let shift_of_word op = match Word.to_int op with +let shift_of_word op = + match Word.to_int op with | Ok 1 -> `ASR | Ok 2 -> `LSL | Ok 3 -> `LSR | Ok 4 -> `ROR | Ok 5 -> `RRX - | _ -> fail [%here] "Imm %s, doesn't stand for shift" - (Word.to_string op) + | _ -> fail [%here] "Imm %s, doesn't stand for shift" (Word.to_string op) let shift_c ~src shift_type ~shift t = let bits = bitlen t in @@ -24,31 +21,35 @@ let shift_c ~src shift_type ~shift t = let e1 = Bil.int (Word.one bits) in match shift_type with | `ASR -> - let shifted = Bil.(src asr shift) in - let carry = nth_bit Bil.(shift - e1) src in - shifted, carry + let shifted = Bil.(src asr shift) in + let carry = nth_bit Bil.(shift - e1) src in + (shifted, carry) | `LSL -> - let shifted = Bil.(src lsl shift) in - let carry = Bil.(ite (shift <> int (Word.zero bits)) - (nth_bit Bil.(bits_e - shift) src) - (var Env.cf)) in - shifted, carry + let shifted = Bil.(src lsl shift) in + let carry = + Bil.( + ite + ~if_:(shift <> int (Word.zero bits)) + ~then_:(nth_bit Bil.(bits_e - shift) src) + ~else_:(var Env.cf)) + in + (shifted, carry) | `LSR -> - let shifted = Bil.(src lsr shift) in - let carry = nth_bit Bil.(shift - e1) src in - shifted, carry + let shifted = Bil.(src lsr shift) in + let carry = nth_bit Bil.(shift - e1) src in + (shifted, carry) | `ROR -> - let ret1 = Bil.(src lsr shift) in - let ret2 = Bil.(src lsl (bits_e - shift)) in - let shifted = Bil.(ret1 lor ret2) in - let carry = nth_bit Bil.(shift - e1) src in - shifted, carry + let ret1 = Bil.(src lsr shift) in + let ret2 = Bil.(src lsl (bits_e - shift)) in + let shifted = Bil.(ret1 lor ret2) in + let carry = nth_bit Bil.(shift - e1) src in + (shifted, carry) | `RRX -> - let ret1 = Bil.(src lsr e1) in - let carryin = Bil.(cast unsigned bits (var Env.cf) lsl (bits_e - e1)) in - let shifted = Bil.(ret1 lor carryin) in - let carry = nth_bit Bil.(int (Word.zero 1)) src in - shifted, carry + let ret1 = Bil.(src lsr e1) in + let carryin = Bil.(cast unsigned bits (var Env.cf) lsl (bits_e - e1)) in + let shifted = Bil.(ret1 lor carryin) in + let carry = nth_bit Bil.(int (Word.zero 1)) src in + (shifted, carry) let r_shift ~src shift_type ~shift t = let shift_type = assert_imm [%here] shift_type in @@ -61,16 +62,14 @@ let i_shift ~src shift_type t = let three = Word.of_int 3 ~width in (* lower three bits are type*) let r = - Word.Int_err.(!$shift_type land !$mask) >>| shift_of_word >>= - fun shift_t -> + Word.Int_err.(!$shift_type land !$mask) >>| shift_of_word >>= fun shift_t -> (* other bits are immediate *) - Word.Int_err.((!$shift_type land (lnot !$mask)) lsr !$three) >>= - fun shift_amt -> - return (shift_t, shift_amt) in + Word.Int_err.((!$shift_type land lnot !$mask) lsr !$three) + >>= fun shift_amt -> return (shift_t, shift_amt) + in match r with | Error err -> fail [%here] "%s" Error.(to_string_hum err) - | Ok (shift_t, shift_amt) -> - shift_c ~src shift_t ~shift:Bil.(int shift_amt) t + | Ok (shift_t, shift_amt) -> shift_c ~src shift_t ~shift:Bil.(int shift_amt) t (* decodes a shifted operand for a memory operation * src - the operand to be shifted @@ -80,35 +79,34 @@ let i_shift ~src shift_type t = * bits 15 through 13 represent the shift type, valid shift types * are number 1 through 5 * typ - the type - **) + *) let mem_shift ~src shift typ = let shift = assert_imm [%here] shift in let width = bitlen typ in let word = Word.of_int ~width in let wordm n = Ok (word n) in let shift_typ w = - Word.Int_err.((!$w land wordm 0xE000) lsr wordm 13) >>| - shift_of_word in + Word.Int_err.((!$w land wordm 0xE000) lsr wordm 13) >>| shift_of_word + in (* Gets the shift amount from the immediate *) let shift_amt w = Word.Int_err.(!$w land wordm 0xFFF) >>| Bil.int in (* Converts the shift to a negative if the negative bit is set *) let to_neg w exp = match Word.Int_err.(wordm 0x1000 land !$w) with - | Ok x when Word.equal x (word 0x1000) -> - Bil.(int (Word.ones width) * exp) - | _ -> exp in - let r = shift_typ shift >>= fun t -> shift_amt shift >>= fun amt -> - return (t,amt) in + | Ok x when Word.equal x (word 0x1000) -> Bil.(int (Word.ones width) * exp) + | _ -> exp + in + let r = + shift_typ shift >>= fun t -> + shift_amt shift >>= fun amt -> return (t, amt) + in match r with | Error err -> fail [%here] "%s" Error.(to_string_hum err) - | Ok (t,amount) -> - let exp, _ = shift_c ~src t ~shift:amount typ in - to_neg shift exp + | Ok (t, amount) -> + let exp, _ = shift_c ~src t ~shift:amount typ in + to_neg shift exp let lift_c = shift_c - let lift_i = i_shift - let lift_r = r_shift - let lift_mem = mem_shift diff --git a/lib/arm/arm_shift.mli b/lib/arm/arm_shift.mli index f9200d514..2639ad211 100644 --- a/lib/arm/arm_shift.mli +++ b/lib/arm/arm_shift.mli @@ -1,49 +1,34 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types -(** Need the operand and the carry flag value src - the source, if you - intend to use the carry bit, this must not be the destination of the - shift expression. This means it must be a temp that contains the - value of the source not the actual source itself shift_type - the type - of shift shift - must be a exp that is the amount of the shift - (ignored for rrx) -*) val lift_c : src:exp -> shift -> shift:exp -> typ -> exp * exp - - -(** decodes a register shifted operand - * src - the operand to be shifted, cannot be the destination - * in practice this means it must be a temp variable. - * shift_type - an int64, bits 2 through 0 represent the shift type - * valid shift types are number 1 through 5 - * shift - the value to shift by - * t - the type - **) +(** Need the operand and the carry flag value src - the source, if you intend to + use the carry bit, this must not be the destination of the shift expression. + This means it must be a temp that contains the value of the source not the + actual source itself shift_type - the type of shift shift - must be a exp + that is the amount of the shift (ignored for rrx) *) + +(** decodes a register shifted operand * src - the operand to be shifted, cannot + be the destination * in practice this means it must be a temp variable. * + shift_type - an int64, bits 2 through 0 represent the shift type * valid + shift types are number 1 through 5 * shift - the value to shift by * t - the + type **) val lift_r : src:exp -> op -> shift:exp -> typ -> exp * exp - -(** decodes an immediate shifted operand - * src - the operand to be shifted, cannot be the destination - * in practice this means it must be a temp variable. - * shift_type - an int64, bits 2 through 0 represent the shift type - * valid shift types are number 1 through 5 - * bits 3 and higher represent the shift amount if this is an - * immediate shift. For register shifts these upper bits are 0. - * If the shift type is RRX, a shift amount of 1 is implied. - * t - the type - **) val lift_i : src:exp -> op -> typ -> exp * exp +(** decodes an immediate shifted operand * src - the operand to be shifted, + cannot be the destination * in practice this means it must be a temp + variable. * shift_type - an int64, bits 2 through 0 represent the shift type + * valid shift types are number 1 through 5 * bits 3 and higher represent the + shift amount if this is an * immediate shift. For register shifts these + upper bits are 0. * If the shift type is RRX, a shift amount of 1 is + implied. * t - the type **) - -(** decodes a shifted operand for a memory operation - * src - the operand to be shifted - * shift - an int64, - * bits 11 through 0 represent the shift amount - * bits 12 represents whether the expression is added or subtracted - * bits 15 through 13 represent the shift type, valid shift types - * are number 1 through 5 - * typ - the type - **) val lift_mem : src:exp -> op -> typ -> exp +(** decodes a shifted operand for a memory operation * src - the operand to be + shifted * shift - an int64, * bits 11 through 0 represent the shift amount * + bits 12 represents whether the expression is added or subtracted * bits 15 + through 13 represent the shift type, valid shift types * are number 1 + through 5 * typ - the type **) diff --git a/lib/arm/arm_target.ml b/lib/arm/arm_target.ml index 1c2f793f8..79763c8c1 100644 --- a/lib/arm/arm_target.ml +++ b/lib/arm/arm_target.ml @@ -1,15 +1,19 @@ let package = "bap" -open Core_kernel[@@warning "-D"] +open Core open Bap_core_theory open Bap.Std open KB.Syntax open Poly open Bap_traces.Std - module CT = Theory -type r128 and r80 and r64 and r32 and r16 and r8 +type r128 +and r80 +and r64 +and r32 +and r16 +and r8 type 'a bitv = 'a CT.Bitv.t CT.Value.sort @@ -18,41 +22,58 @@ let r80 : r80 bitv = CT.Bitv.define 80 let r64 : r64 bitv = CT.Bitv.define 64 let r32 : r32 bitv = CT.Bitv.define 32 let r16 : r16 bitv = CT.Bitv.define 16 -let r8 : r8 bitv = CT.Bitv.define 8 +let r8 : r8 bitv = CT.Bitv.define 8 let bool = CT.Bool.t - - let reg t n = CT.Var.define t n let untyped = List.map ~f:CT.Var.forget -let (@<) xs ys = untyped xs @ untyped ys +let ( @< ) xs ys = untyped xs @ untyped ys -let array ?(index=string_of_int) t pref size = +let array ?(index = string_of_int) t pref size = List.init size ~f:(fun i -> reg t (pref ^ index i)) let mems = CT.Mem.define r32 r8 let data = CT.Var.define mems (Var.name Arm_env.mem) - -let of_bil v = - CT.Var.define (Var.sort v) (Var.name v) - -let regs xs = untyped@@List.map ~f:of_bil xs - -let vars32 = regs Arm_env.[ - r0; r1; r2; r3; r4; r5; r6; r7; r8; r9; - r10; r11; r12; sp; lr; mem; - nf; zf; cf; vf; qf; - ] - +let of_bil v = CT.Var.define (Var.sort v) (Var.name v) +let regs xs = untyped @@ List.map ~f:of_bil xs + +let vars32 = + regs + Arm_env. + [ + r0; + r1; + r2; + r3; + r4; + r5; + r6; + r7; + r8; + r9; + r10; + r11; + r12; + sp; + lr; + mem; + nf; + zf; + cf; + vf; + qf; + ] let thumb = Theory.Role.declare ~package:"arm" "thumb" -let status_regs = Theory.Role.Register.[ - [status; integer], regs Arm_env.[nf; zf; cf; vf; qf]; - [carry_flag], regs Arm_env.[cf]; - [sign_flag], regs Arm_env.[nf]; - [zero_flag], regs Arm_env.[zf]; - [overflow_flag], regs Arm_env.[vf]; - ] +let status_regs = + Theory.Role.Register. + [ + ([ status; integer ], regs Arm_env.[ nf; zf; cf; vf; qf ]); + ([ carry_flag ], regs Arm_env.[ cf ]); + ([ sign_flag ], regs Arm_env.[ nf ]); + ([ zero_flag ], regs Arm_env.[ zf ]); + ([ overflow_flag ], regs Arm_env.[ vf ]); + ] (* AACPS, §5.1.1 Core registers: @@ -81,37 +102,31 @@ let status_regs = Theory.Role.Register.[ r4-r8, r10, r11 and SP (and r9 in PCS variants that designate r9 as v6). *) -let regs32 = Theory.Role.Register.[ - [general; integer], regs Arm_env.[ - r0; r1; r2; r3; r4; r5; r6; r7; r8; r9; r10; r11; r12; - sp; lr; - ]; - [stack_pointer], regs Arm_env.[sp]; - [frame_pointer], regs Arm_env.[r11]; - [link], regs Arm_env.[lr]; - [thumb], regs Arm_env.[ - r0; r1; r2; r3; r4; r5; r6; r7; sp; lr; - ]; - [function_argument], regs Arm_env.[r0; r1; r2; r3]; - [function_return], regs Arm_env.[r0; r1]; - [caller_saved], regs Arm_env.[ - r0; r1; r2; r3; r12; lr; - ]; - [callee_saved], regs Arm_env.[ - r4; r5; r6; r7; r8; r9; r10; r11; sp; - ]; - ] @ status_regs - -let vfp2regs = Theory.Role.Register.[ - [general; floating], untyped@@array r64 "D" 16; - ] - -let vfp3regs = Theory.Role.Register.[ - [general; floating], untyped@@array r64 "D" 32; - ] +let regs32 = + Theory.Role.Register. + [ + ( [ general; integer ], + regs + Arm_env. + [ r0; r1; r2; r3; r4; r5; r6; r7; r8; r9; r10; r11; r12; sp; lr ] ); + ([ stack_pointer ], regs Arm_env.[ sp ]); + ([ frame_pointer ], regs Arm_env.[ r11 ]); + ([ link ], regs Arm_env.[ lr ]); + ([ thumb ], regs Arm_env.[ r0; r1; r2; r3; r4; r5; r6; r7; sp; lr ]); + ([ function_argument ], regs Arm_env.[ r0; r1; r2; r3 ]); + ([ function_return ], regs Arm_env.[ r0; r1 ]); + ([ caller_saved ], regs Arm_env.[ r0; r1; r2; r3; r12; lr ]); + ([ callee_saved ], regs Arm_env.[ r4; r5; r6; r7; r8; r9; r10; r11; sp ]); + ] + @ status_regs + +let vfp2regs = + Theory.Role.Register.[ ([ general; floating ], untyped @@ array r64 "D" 16) ] + +let vfp3regs = + Theory.Role.Register.[ ([ general; floating ], untyped @@ array r64 "D" 32) ] let vars32_fp = vars32 @ untyped @@ array r64 "D" 16 - let rs = array r64 "R" 32 let xs = array r64 "X" 32 let ws = array r32 "W" 32 @@ -121,70 +136,68 @@ let ds = array r64 "D" 32 let ss = array r32 "S" 32 let hs = array r16 "H" 32 let bs = array r8 "B" 32 -let fp64 = reg r64 "FP" (* X29 *) -let lr64 = reg r64 "LR" (* X30 *) -let sp64 = reg r64 "SP" (* X31 *) -let sp32 = reg r32 "WSP" (* W31 *) +let fp64 = reg r64 "FP" (* X29 *) +let lr64 = reg r64 "LR" (* X30 *) +let sp64 = reg r64 "SP" (* X31 *) +let sp32 = reg r32 "WSP" (* W31 *) let zr = reg r64 "ZR" let zr64 = reg r64 "XZR" let zr32 = reg r32 "WZR" let memsv8 = CT.Mem.define r64 r8 let datav8 = CT.Var.define memsv8 "mem" -let flagsv8 = [ - reg bool "NF"; - reg bool "ZF"; - reg bool "CF"; - reg bool "VF"; -] - -let (.$()) = List.nth_exn +let flagsv8 = [ reg bool "NF"; reg bool "ZF"; reg bool "CF"; reg bool "VF" ] +let ( .$() ) = List.nth_exn let aliases = - xs @< ws @< qs @< ds @< ss @< hs @< bs - @<[fp64; lr64; sp64; zr64] - @<[sp32; zr32] - -let varsv8 = rs @< flagsv8 @< [datav8] - -let regsv8 = Theory.Role.Register.[ - [general; integer], untyped rs; - [general; floating], untyped xs; - [stack_pointer], untyped [reg r64 "R31"]; - [frame_pointer], untyped [reg r64 "R29"]; - [function_argument; function_return], - array r64 "R" 8 @< array r64 "V" 8; - [constant; zero; pseudo], untyped [reg r64 "XZR"; reg r64 "ZR"]; - [constant; zero; pseudo], untyped [reg r32 "WZR"]; - [link], untyped [reg r64 "R30"]; - [alias], aliases; - ] @ status_regs + xs @< ws @< qs @< ds @< ss @< hs @< bs @< [ fp64; lr64; sp64; zr64 ] + @< [ sp32; zr32 ] + +let varsv8 = rs @< flagsv8 @< [ datav8 ] + +let regsv8 = + Theory.Role.Register. + [ + ([ general; integer ], untyped rs); + ([ general; floating ], untyped xs); + ([ stack_pointer ], untyped [ reg r64 "R31" ]); + ([ frame_pointer ], untyped [ reg r64 "R29" ]); + ( [ function_argument; function_return ], + array r64 "R" 8 @< array r64 "V" 8 ); + ([ constant; zero; pseudo ], untyped [ reg r64 "XZR"; reg r64 "ZR" ]); + ([ constant; zero; pseudo ], untyped [ reg r32 "WZR" ]); + ([ link ], untyped [ reg r64 "R30" ]); + ([ alias ], aliases); + ] + @ status_regs let equal xs ys = - List.map2_exn xs ys ~f:Theory.Alias.(fun x y -> def x [reg y]) + List.map2_exn xs ys ~f:Theory.Alias.(fun x y -> def x [ reg y ]) let lower xs _ ys = - List.map2_exn xs ys ~f:Theory.Alias.(fun x y -> def x [unk; reg y]) + List.map2_exn xs ys ~f:Theory.Alias.(fun x y -> def x [ unk; reg y ]) let are f x y = f x y -let aliasing = Theory.Alias.[ +let aliasing = + Theory.Alias. [ - def fp64 [reg xs.$(29)]; - def lr64 [reg xs.$(30)]; - def sp64 [reg xs.$(31)]; - def sp64 [unk; reg sp32]; - def zr [reg zr64]; - def zr [unk; reg zr32]; - ]; - are equal rs xs; - lower rs are ws; - are equal vs qs; - lower qs are ds; - lower ds are ss; - lower ss are hs; - lower hs are bs; - ] |> List.concat - + [ + def fp64 [ reg xs.$(29) ]; + def lr64 [ reg xs.$(30) ]; + def sp64 [ reg xs.$(31) ]; + def sp64 [ unk; reg sp32 ]; + def zr [ reg zr64 ]; + def zr [ unk; reg zr32 ]; + ]; + are equal rs xs; + lower rs are ws; + are equal vs qs; + lower qs are ds; + lower ds are ss; + lower ss are hs; + lower hs are bs; + ] + |> List.concat let parent = CT.Target.declare ~package "arm-family" let is_arm = CT.Target.belongs parent @@ -222,7 +235,10 @@ module type ARM = sig val v86a : CT.Target.t end -module type Endianness = sig val endianness : CT.endianness end +module type Endianness = sig + val endianness : CT.endianness +end + module Family (Order : Endianness) = struct include Order @@ -234,67 +250,50 @@ module Family (Order : Endianness) = struct name ^ "+" ^ KB.Name.unqualified order let def ?code_alignment ~parent name = - if CT.Target.is_unknown parent - then CT.Target.unknown - else CT.Target.declare ~package (ordered name) ~parent - ?code_alignment - ~nicknames:[name] - - let (<:) parent name = def ~parent name + if CT.Target.is_unknown parent then CT.Target.unknown + else + CT.Target.declare ~package (ordered name) ~parent ?code_alignment + ~nicknames:[ name ] + let ( <: ) parent name = def ~parent name let v4 = - if is_bi_endian - then CT.Target.unknown - else CT.Target.declare ~package (ordered "armv4") - ~parent - ~code_alignment:32 - ~nicknames:["armv4"] - ~bits:32 - ~byte:8 - ~endianness - ~code:data - ~data:data - ~vars:vars32 - ~regs:regs32 + if is_bi_endian then CT.Target.unknown + else + CT.Target.declare ~package (ordered "armv4") ~parent ~code_alignment:32 + ~nicknames:[ "armv4" ] ~bits:32 ~byte:8 ~endianness ~code:data ~data + ~vars:vars32 ~regs:regs32 let v4t = def "armv4t" ~parent:v4 ~code_alignment:16 - let v5 = v4 <: "armv5" - let v5t = v5 <: "armv5t" - let v5te = v5t <: "armv5te" + let v5 = v4 <: "armv5" + let v5t = v5 <: "armv5t" + let v5te = v5t <: "armv5te" let v5tej = v5te <: "armv5tej" - let v6 = v5tej <: "armv6" - let v6t2 = v6 <: "armv6t2" - let v6z = v6 <: "armv6z" - let v6k = v6z <: "armv6k" - let v6m = v6 <: "armv6-m" - - let v7 = if not is_bi_endian then v6t2 <: "armv7" - else CT.Target.declare ~package (ordered "armv7") - ~parent - ~nicknames:["armv7"] - ~code_alignment:16 - ~bits:32 - ~byte:8 - ~endianness - ~code:data - ~data:data - ~vars:vars32 - ~regs:regs32 + let v6 = v5tej <: "armv6" + let v6t2 = v6 <: "armv6t2" + let v6z = v6 <: "armv6z" + let v6k = v6z <: "armv6k" + let v6m = v6 <: "armv6-m" + + let v7 = + if not is_bi_endian then v6t2 <: "armv7" + else + CT.Target.declare ~package (ordered "armv7") ~parent + ~nicknames:[ "armv7" ] ~code_alignment:16 ~bits:32 ~byte:8 ~endianness + ~code:data ~data ~vars:vars32 ~regs:regs32 let v7m = v7 <: "armv7-m" - let v7fp = CT.Target.declare ~package (ordered "armv7+fp") ~parent:v7 - ~nicknames:["armv7+fp"] - ~vars:vars32_fp - ~regs:(regs32@vfp3regs) + let v7fp = + CT.Target.declare ~package (ordered "armv7+fp") ~parent:v7 + ~nicknames:[ "armv7+fp" ] ~vars:vars32_fp ~regs:(regs32 @ vfp3regs) + + let v7a = v7 <: "armv7-a" - let v7a = v7 <: "armv7-a" - let v7afp = CT.Target.declare ~package (ordered "armv7-a+fp") - ~nicknames:["armv7-a+fp"] - ~parent:v7a - ~vars:vars32_fp - ~regs:(regs32@vfp3regs) + let v7afp = + CT.Target.declare ~package (ordered "armv7-a+fp") + ~nicknames:[ "armv7-a+fp" ] ~parent:v7a ~vars:vars32_fp + ~regs:(regs32 @ vfp3regs) (* the generic final v7 incorporating all 32-bit targets *) let aarch32 = @@ -303,46 +302,34 @@ module Family (Order : Endianness) = struct Theory.Target.declare ~package (if is_little then "arm" else "armeb") ~parent:v7afp - ~nicknames:[ - if is_little - then "aarch32" - else "aarch32eb"; - ] - + ~nicknames:[ (if is_little then "aarch32" else "aarch32eb") ] let v8 = CT.Target.declare ~package (ordered "armv8") ~parent:(if is_bi_endian then parent else aarch32) - ~nicknames:["aarch64"] - ~aliasing - ~code_alignment:32 - ~bits:64 - ~code:datav8 - ~data:datav8 - ~vars:varsv8 - ~regs:regsv8 - + ~nicknames:[ "aarch64" ] ~aliasing ~code_alignment:32 ~bits:64 + ~code:datav8 ~data:datav8 ~vars:varsv8 ~regs:regsv8 let v8a = CT.Target.declare ~package (ordered "armv8-a") ~parent:v8 - ~nicknames:["armv8-a"] + ~nicknames:[ "armv8-a" ] let v8a32 = - Theory.Target.declare ~package (ordered "armv8-a+aarch32") - ~nicknames:["armv8-a+aarch32"] - ~parent:v7 + Theory.Target.declare ~package + (ordered "armv8-a+aarch32") + ~nicknames:[ "armv8-a+aarch32" ] ~parent:v7 let v8m32 = - Theory.Target.declare ~package (ordered "armv8-m+aarch32") - ~nicknames:["armv8-m+aarch32"] - ~parent:v7m + Theory.Target.declare ~package + (ordered "armv8-m+aarch32") + ~nicknames:[ "armv8-m+aarch32" ] ~parent:v7m let v8r32 = - Theory.Target.declare ~package (ordered "armv8-r+aarch32") - ~nicknames:["armv8-r+aarch32"] - ~parent:v7 + Theory.Target.declare ~package + (ordered "armv8-r+aarch32") + ~nicknames:[ "armv8-r+aarch32" ] ~parent:v7 - let v81a = v8a <: "armv8.1-a" + let v81a = v8a <: "armv8.1-a" let v82a = v81a <: "armv8.2-a" let v83a = v82a <: "armv8.3-a" let v84a = v83a <: "armv8.4-a" @@ -356,58 +343,59 @@ module Family (Order : Endianness) = struct Theory.Target.declare ~package (if is_little then "aarch64" else "aarch64_be") ~parent:v86a - ~nicknames:[ - if is_little - then "arm64" - else "arm64eb"; - ] + ~nicknames:[ (if is_little then "arm64" else "arm64eb") ] let v9a = v86a <: "armv9-a" - let parent = if is_bi_endian then v7 else v4 - end -module LE = Family(struct let endianness = CT.Endianness.le end) -module Bi = Family(struct let endianness = CT.Endianness.bi end) -module EB = Family(struct let endianness = CT.Endianness.eb end) +module LE = Family (struct + let endianness = CT.Endianness.le +end) + +module Bi = Family (struct + let endianness = CT.Endianness.bi +end) -let family_of_endian is_little : (module ARM) = match is_little with +module EB = Family (struct + let endianness = CT.Endianness.eb +end) + +let family_of_endian is_little : (module ARM) = + match is_little with | None -> (module Bi) | Some true -> (module LE) | Some false -> (module EB) - -let prefixes = ["arm"; "thumb"; "aarch64";] -let suffixes = ["eb"; "_be"] +let prefixes = [ "arm"; "thumb"; "aarch64" ] +let suffixes = [ "eb"; "_be" ] let in_family = function | None -> false - | Some x -> List.exists prefixes ~f:(fun prefix -> - String.is_prefix ~prefix x) + | Some x -> List.exists prefixes ~f:(fun prefix -> String.is_prefix ~prefix x) let drop_end s = - Option.value ~default:s @@ - List.find_map suffixes ~f:(fun suffix -> - String.chop_suffix ~suffix s) + Option.value ~default:s + @@ List.find_map suffixes ~f:(fun suffix -> String.chop_suffix ~suffix s) -let split s = List.find_map_exn prefixes ~f:(fun prefix -> - match String.chop_prefix ~prefix s with - | None -> None - | Some r -> Some (prefix,drop_end r)) +let split s = + List.find_map_exn prefixes ~f:(fun prefix -> + match String.chop_prefix ~prefix s with + | None -> None + | Some r -> Some (prefix, drop_end r)) let normalize arch sub = - match arch,sub with - | None,_ -> assert false - | Some arch,None -> split arch - | Some arch, Some sub -> arch,sub + match (arch, sub) with + | None, _ -> assert false + | Some arch, None -> split arch + | Some arch, Some sub -> (arch, sub) let enable_loader () = let open Bap.Std in - KB.Rule.(declare ~package "arm-target" |> - require Image.Spec.slot |> - provide CT.Unit.target |> - comment "computes target from the OGRE specification"); + KB.Rule.( + declare ~package "arm-target" + |> require Image.Spec.slot |> provide CT.Unit.target + |> comment "computes target from the OGRE specification"); let open KB.Syntax in let request_info doc = let open Ogre.Syntax in @@ -416,195 +404,194 @@ let enable_loader () = Ogre.request Image.Scheme.subarch >>= fun sub -> Ogre.request Image.Scheme.is_little_endian >>= fun little -> Ogre.request Image.Scheme.format >>= fun filetype -> - Ogre.return (arch,sub,little,filetype) in + Ogre.return (arch, sub, little, filetype) + in match Ogre.eval request doc with - | Error _ -> None,None,None,None - | Ok info -> info in + | Error _ -> (None, None, None, None) + | Ok info -> info + in KB.promise CT.Unit.target @@ fun unit -> - KB.collect Image.Spec.slot unit >>| - request_info >>| fun (arch,sub,is_little,filetype) -> + KB.collect Image.Spec.slot unit >>| request_info + >>| fun (arch, sub, is_little, filetype) -> if not (in_family arch) then CT.Target.unknown else let module Family = (val family_of_endian is_little) in - let parent = match normalize arch sub with - | "arm","v6m" -> Family.v6m - | "thumb",_ -> Family.v7m - | "arm", - ("v4"|"v4t"| - "v5"|"v5t"|"v5te"|"v5tej"| - "v6"|"v6z"|"v6k"|"v6t2"| - "v7"|"v7fp"|"v7a"|"v7afp") -> Family.aarch32 - | "arm", - ("v8"|"v8a"|"v81a"|"v82a"|"v83a"|"v84a"|"v85a"|"v86a") -> - Family.aarch64 - | "aarch64",_ -> Family.aarch64 - | _ -> Family.v7 in + let parent = + match normalize arch sub with + | "arm", "v6m" -> Family.v6m + | "thumb", _ -> Family.v7m + | ( "arm", + ( "v4" | "v4t" | "v5" | "v5t" | "v5te" | "v5tej" | "v6" | "v6z" + | "v6k" | "v6t2" | "v7" | "v7fp" | "v7a" | "v7afp" ) ) -> + Family.aarch32 + | ( "arm", + ("v8" | "v8a" | "v81a" | "v82a" | "v83a" | "v84a" | "v85a" | "v86a") ) + -> + Family.aarch64 + | "aarch64", _ -> Family.aarch64 + | _ -> Family.v7 + in let is_64bit = Theory.Target.bits parent = 64 in - let filetype,system,abi = match filetype with - | Some "elf" -> Theory.Filetype.elf, - Theory.System.linux, - if is_64bit then Theory.Abi.gnu else Theory.Abi.gnueabi - | Some "coff" -> Theory.Filetype.coff,Theory.System.windows,Theory.Abi.eabi - | Some "macho" -> Theory.Filetype.macho, Theory.System.darwin,Theory.Abi.eabi - | _ -> Theory.Filetype.unknown,Theory.System.unknown,Theory.Abi.unknown in + let filetype, system, abi = + match filetype with + | Some "elf" -> + ( Theory.Filetype.elf, + Theory.System.linux, + if is_64bit then Theory.Abi.gnu else Theory.Abi.gnueabi ) + | Some "coff" -> + (Theory.Filetype.coff, Theory.System.windows, Theory.Abi.eabi) + | Some "macho" -> + (Theory.Filetype.macho, Theory.System.darwin, Theory.Abi.eabi) + | _ -> (Theory.Filetype.unknown, Theory.System.unknown, Theory.Abi.unknown) + in Theory.Target.select ~strict:true ~system ~parent ~filetype ~abi () - -type arms = [ - | Arch.arm - | Arch.armeb - | Arch.thumb - | Arch.thumbeb - | Arch.aarch64 -] +type arms = [ Arch.arm | Arch.armeb | Arch.thumb | Arch.thumbeb | Arch.aarch64 ] let arms : arms Map.M(CT.Target).t = - Map.of_alist_exn (module CT.Target) [ - LE.aarch32, `armv7; - EB.aarch32, `armv7eb; - LE.aarch64, `aarch64; - EB.aarch64, `aarch64_be; - LE.v4, `armv4; - LE.v4t, `armv4; - LE.v5, `armv5; - LE.v5t, `armv5; - LE.v5te, `armv5; - LE.v5tej, `armv5; - LE.v6, `armv6; - LE.v6z, `armv6; - LE.v6k, `armv6; - LE.v6m, `armv6; - LE.v6t2, `armv6; - LE.v7, `armv7; - LE.v7a, `armv7; - LE.v7m, `thumbv7; - LE.v7afp, `armv7; - Bi.v7, `armv7; - Bi.v7a, `armv7; - Bi.v7m, `thumbv7; - Bi.v7afp, `armv7; - LE.v8a, `aarch64; - LE.v81a, `aarch64; - LE.v82a, `aarch64; - LE.v83a, `aarch64; - LE.v84a, `aarch64; - LE.v85a, `aarch64; - LE.v86a, `aarch64; - EB.v4, `armv4eb; - EB.v4t, `armv4eb; - EB.v5, `armv5eb; - EB.v5t, `armv5eb; - EB.v5te, `armv5eb; - EB.v5tej, `armv5eb; - EB.v6, `armv6eb; - EB.v6z, `armv6eb; - EB.v6k, `armv6eb; - EB.v6m, `armv6eb; - EB.v6t2,`armv6eb; - EB.v7, `armv7eb; - EB.v7a, `armv7eb; - EB.v7m, `thumbv7eb; - EB.v7afp, `armv7eb; - EB.v8a, `aarch64_be; - EB.v81a, `aarch64_be; - EB.v82a, `aarch64_be; - EB.v83a, `aarch64_be; - EB.v84a, `aarch64_be; - EB.v85a, `aarch64_be; - EB.v86a, `aarch64_be; - ] - + Map.of_alist_exn + (module CT.Target) + [ + (LE.aarch32, `armv7); + (EB.aarch32, `armv7eb); + (LE.aarch64, `aarch64); + (EB.aarch64, `aarch64_be); + (LE.v4, `armv4); + (LE.v4t, `armv4); + (LE.v5, `armv5); + (LE.v5t, `armv5); + (LE.v5te, `armv5); + (LE.v5tej, `armv5); + (LE.v6, `armv6); + (LE.v6z, `armv6); + (LE.v6k, `armv6); + (LE.v6m, `armv6); + (LE.v6t2, `armv6); + (LE.v7, `armv7); + (LE.v7a, `armv7); + (LE.v7m, `thumbv7); + (LE.v7afp, `armv7); + (Bi.v7, `armv7); + (Bi.v7a, `armv7); + (Bi.v7m, `thumbv7); + (Bi.v7afp, `armv7); + (LE.v8a, `aarch64); + (LE.v81a, `aarch64); + (LE.v82a, `aarch64); + (LE.v83a, `aarch64); + (LE.v84a, `aarch64); + (LE.v85a, `aarch64); + (LE.v86a, `aarch64); + (EB.v4, `armv4eb); + (EB.v4t, `armv4eb); + (EB.v5, `armv5eb); + (EB.v5t, `armv5eb); + (EB.v5te, `armv5eb); + (EB.v5tej, `armv5eb); + (EB.v6, `armv6eb); + (EB.v6z, `armv6eb); + (EB.v6k, `armv6eb); + (EB.v6m, `armv6eb); + (EB.v6t2, `armv6eb); + (EB.v7, `armv7eb); + (EB.v7a, `armv7eb); + (EB.v7m, `thumbv7eb); + (EB.v7afp, `armv7eb); + (EB.v8a, `aarch64_be); + (EB.v81a, `aarch64_be); + (EB.v82a, `aarch64_be); + (EB.v83a, `aarch64_be); + (EB.v84a, `aarch64_be); + (EB.v85a, `aarch64_be); + (EB.v86a, `aarch64_be); + ] let smc = Theory.Abi.declare ~package:"arm" "smc" let hvc = Theory.Abi.declare ~package:"arm" "hvc" -let subtargets = [ - (* 32-bit targets *) - - (* generic and standalone targets *) - [EB.aarch32; LE.aarch32], - Theory.System.[unknown; none], - Theory.Abi.[eabi], - Theory.Fabi.[unknown; hard], - Theory.Filetype.[unknown]; - - (* uefi *) - [LE.aarch32], - Theory.System.[uefi], - Theory.Abi.[smc], - Theory.Fabi.[unknown], - Theory.Filetype.[unknown; coff]; - - (* linux targets *) - [EB.aarch32; LE.aarch32], - Theory.System.[linux], - Theory.Abi.[gnueabi; gnu], - Theory.Fabi.[unknown; hard], - Theory.Filetype.[unknown; elf]; - - (* darwin targets *) - [LE.aarch32], - Theory.System.[darwin], - Theory.Abi.[eabi], - Theory.Fabi.[unknown], - Theory.Filetype.[unknown; macho]; - - (* 64-bit targets *) - [EB.aarch64; LE.aarch64], - Theory.System.[unknown; none], - Theory.Abi.[eabi], - Theory.Fabi.[unknown], - Theory.Filetype.[unknown]; - - (* uefi *) - [LE.aarch64], - Theory.System.[uefi], - Theory.Abi.[smc], - Theory.Fabi.[unknown], - Theory.Filetype.[unknown; coff]; - - (* linux targets *) - [EB.aarch64; LE.aarch64], - Theory.System.[linux], - Theory.Abi.[gnu], - Theory.Fabi.[unknown], - Theory.Filetype.[unknown; elf]; - - (* darwin targets *) - [LE.aarch64], - Theory.System.[darwin], - Theory.Abi.[eabi], - Theory.Fabi.[unknown], - Theory.Filetype.[unknown; macho]; - - (* windows targets *) - [LE.aarch64], - Theory.System.[windows], - Theory.Abi.[eabi], - Theory.Fabi.[unknown], - Theory.Filetype.[unknown; coff]; -] +let subtargets = + [ + (* 32-bit targets *) + + (* generic and standalone targets *) + ( [ EB.aarch32; LE.aarch32 ], + Theory.System.[ unknown; none ], + Theory.Abi.[ eabi ], + Theory.Fabi.[ unknown; hard ], + Theory.Filetype.[ unknown ] ); + (* uefi *) + ( [ LE.aarch32 ], + Theory.System.[ uefi ], + Theory.Abi.[ smc ], + Theory.Fabi.[ unknown ], + Theory.Filetype.[ unknown; coff ] ); + (* linux targets *) + ( [ EB.aarch32; LE.aarch32 ], + Theory.System.[ linux ], + Theory.Abi.[ gnueabi; gnu ], + Theory.Fabi.[ unknown; hard ], + Theory.Filetype.[ unknown; elf ] ); + (* darwin targets *) + ( [ LE.aarch32 ], + Theory.System.[ darwin ], + Theory.Abi.[ eabi ], + Theory.Fabi.[ unknown ], + Theory.Filetype.[ unknown; macho ] ); + (* 64-bit targets *) + ( [ EB.aarch64; LE.aarch64 ], + Theory.System.[ unknown; none ], + Theory.Abi.[ eabi ], + Theory.Fabi.[ unknown ], + Theory.Filetype.[ unknown ] ); + (* uefi *) + ( [ LE.aarch64 ], + Theory.System.[ uefi ], + Theory.Abi.[ smc ], + Theory.Fabi.[ unknown ], + Theory.Filetype.[ unknown; coff ] ); + (* linux targets *) + ( [ EB.aarch64; LE.aarch64 ], + Theory.System.[ linux ], + Theory.Abi.[ gnu ], + Theory.Fabi.[ unknown ], + Theory.Filetype.[ unknown; elf ] ); + (* darwin targets *) + ( [ LE.aarch64 ], + Theory.System.[ darwin ], + Theory.Abi.[ eabi ], + Theory.Fabi.[ unknown ], + Theory.Filetype.[ unknown; macho ] ); + (* windows targets *) + ( [ LE.aarch64 ], + Theory.System.[ windows ], + Theory.Abi.[ eabi ], + Theory.Fabi.[ unknown ], + Theory.Filetype.[ unknown; coff ] ); + ] let install_subtargets () = - List.iter subtargets ~f:(fun (family,systems,abis,fabis,filetypes) -> - List.iter family ~f:(Theory.Target.register ~systems ~abis ~fabis ~filetypes)) + List.iter subtargets ~f:(fun (family, systems, abis, fabis, filetypes) -> + List.iter family + ~f:(Theory.Target.register ~systems ~abis ~fabis ~filetypes)) let enable_arch () = let open KB.Syntax in - KB.Rule.(declare ~package "arm-arch" |> - require CT.Unit.target |> - provide Arch.unit_slot |> - comment "computes Arch.t from the unit's target"); + KB.Rule.( + declare ~package "arm-arch" + |> require CT.Unit.target |> provide Arch.unit_slot + |> comment "computes Arch.t from the unit's target"); KB.promise Arch.unit_slot @@ fun unit -> KB.collect CT.Unit.target unit >>| fun t -> - if is_arm t then match Map.find arms t with + if is_arm t then + match Map.find arms t with | Some arch -> (arch :> Arch.t) - | None -> match is_64bit t,is_big t,is_little t with - | _,false,false -> `unknown - | true,true,_ -> `aarch64_be - | true,false,_ -> `aarch64 - | false,true,_ -> `armv7eb - | false,false,_ -> `armv7 + | None -> ( + match (is_64bit t, is_big t, is_little t) with + | _, false, false -> `unknown + | true, true, _ -> `aarch64_be + | true, false, _ -> `aarch64 + | false, true, _ -> `armv7eb + | false, false, _ -> `armv7) else `unknown let llvm_a32 = CT.Language.declare ~package "llvm-armv7" @@ -616,27 +603,29 @@ module Dis = Disasm_expert.Basic let register ?attrs encoding triple = Dis.register encoding @@ fun t -> - let triple = if Theory.Endianness.(eb = Theory.Target.endianness t) - then triple ^ "eb" else triple in + let triple = + if Theory.Endianness.(eb = Theory.Target.endianness t) then triple ^ "eb" + else triple + in Dis.create ?attrs ~backend:"llvm" triple let symbol_values doc = let open Ogre.Let in let open Image.Scheme in let symbols = - let* symtab = - Ogre.(collect Query.(select (from symbol_value))) in + let* symtab = Ogre.(collect Query.(select (from symbol_value))) in let+ entry = Ogre.request entry_point in match entry with | None -> symtab | Some entry -> - let mask = Int64.(-1L lsl 1) in - Seq.cons Int64.(entry land mask, entry) symtab in + let mask = Int64.(-1L lsl 1) in + Seq.cons Int64.(entry land mask, entry) symtab + in match Ogre.eval symbols doc with | Ok syms -> syms | Error err -> - failwithf "Arm_target: broken file specification: %s" - (Error.to_string_hum err) () + failwithf "Arm_target: broken file specification: %s" + (Error.to_string_hum err) () module Encodings = struct open Bap_primus.Std @@ -644,57 +633,56 @@ module Encodings = struct module Sigma = Lambda.Semantics let empty = Map.empty (module Bitvec_order) - let lsb x = Int64.(x land 1L) let is_thumb x = Int64.equal (lsb x) 1L - let symbols_encoding spec = - symbol_values spec |> - Seq.fold ~init:empty ~f:(fun symbols (addr,value) -> - let addr = Bitvec.M32.int64 addr in - if is_thumb value - then Map.set symbols addr llvm_t32 - else Map.update symbols addr ~f:(function - | None -> llvm_a32 - | Some t -> t)) - - let slot = KB.Class.property CT.Unit.cls - ~package "symbols-encodings" @@ - KB.Domain.mapping (module Bitvec_order) "encodings" - ~equal:CT.Language.equal - + symbol_values spec + |> Seq.fold ~init:empty ~f:(fun symbols (addr, value) -> + let addr = Bitvec.M32.int64 addr in + if is_thumb value then Map.set symbols ~key:addr ~data:llvm_t32 + else + Map.update symbols addr ~f:(function + | None -> llvm_a32 + | Some t -> t)) + + let slot = + KB.Class.property CT.Unit.cls ~package "symbols-encodings" + @@ KB.Domain.mapping + (module Bitvec_order) + "encodings" ~equal:CT.Language.equal let set_encoding label x y = let* addr = x.?[Sigma.static] in let* code = y.?[Sigma.symbol] in - let* unit = label-->?Theory.Label.unit in - let* lang = match code with + let* unit = label -->? Theory.Label.unit in + let* lang = + match code with | ":t32" | ":T32" -> !!llvm_t32 | ":a32" | ":A32" -> !!llvm_a32 - | other -> - Sigma.failp "unknown encoding %s, expects :T32 or :A32" other in - let* encodings = unit-->slot in - let res = Map.set encodings addr lang in - KB.catch (KB.provide slot unit res) (fun _ -> KB.return ()) >>| - fun () -> Sigma.Effect.pure Sigma.Value.nil - + | other -> Sigma.failp "unknown encoding %s, expects :T32 or :A32" other + in + let* encodings = unit --> slot in + let res = Map.set encodings ~key:addr ~data:lang in + KB.catch (KB.provide slot unit res) (fun _ -> KB.return ()) >>| fun () -> + Sigma.Effect.pure Sigma.Value.nil let provide_primitive () = - let types = Lambda.Type.Spec.(tuple [int; sym] @-> sym) in - let docs = "(arm-set-encoding ADDR ENC) specifies the encoding \ - of instruction at ADDR as ENC, where ENC is either, \ - :T32 or :A32" in - Sigma.declare ~types ~docs ~package:"bap" "arm-set-encoding" - ~body:(fun _ -> KB.return @@ fun lbl args -> match args with - | [addr; encoding] -> set_encoding lbl addr encoding + let types = Lambda.Type.Spec.(tuple [ int; sym ] @-> sym) in + let docs = + "(arm-set-encoding ADDR ENC) specifies the encoding of instruction at \ + ADDR as ENC, where ENC is either, :T32 or :A32" + in + Sigma.declare ~types ~docs ~package:"bap" "arm-set-encoding" ~body:(fun _ -> + KB.return @@ fun lbl args -> + match args with + | [ addr; encoding ] -> set_encoding lbl addr encoding | _ -> Sigma.failp "expected two arguments") let provide_encodings () = let open KB.Syntax in KB.promise slot @@ fun label -> - KB.collect Image.Spec.slot label >>| - symbols_encoding + KB.collect Image.Spec.slot label >>| symbols_encoding let provide () = provide_encodings (); @@ -710,109 +698,98 @@ let has_t32 label = KB.collect CT.Label.unit label >>= function | None -> !!false | Some unit -> - KB.collect Encodings.slot unit >>| - Map.exists ~f:(Theory.Language.equal llvm_t32) - + KB.collect Encodings.slot unit + >>| Map.exists ~f:(Theory.Language.equal llvm_t32) let is_word_aligned x = Bitvec.(M32.(int 3 land x) = zero) let compute_encoding_from_symbol_table label = - let (>>=?) x f = x >>= function - | None -> !!Theory.Language.unknown - | Some x -> f x in + let ( >>=? ) x f = + x >>= function None -> !!Theory.Language.unknown | Some x -> f x + in KB.collect CT.Label.unit label >>=? fun unit -> KB.collect CT.Label.addr label >>=? fun addr -> KB.collect Encodings.slot unit >>| fun encodings -> if not (is_word_aligned addr) then llvm_t32 - else match Map.find encodings addr with + else + match Map.find encodings addr with | Some x -> x | None -> CT.Language.unknown (* here t < p means that t was introduced before p *) -let (>=) t p = CT.Target.belongs t p -let (<) t p = t >= p && not (p >= t) -let (<=) t p = t = p || t < p - -let m_profiles = [ - LE.v7m; EB.v7m; Bi.v7m; - LE.v8m32; EB.v8m32; Bi.v8m32; -] -let is_thumb_only t = - List.exists m_profiles ~f:(fun p -> p <= t) - +let ( >= ) t p = CT.Target.belongs t p +let ( < ) t p = t >= p && not (p >= t) +let ( <= ) t p = t = p || t < p +let m_profiles = [ LE.v7m; EB.v7m; Bi.v7m; LE.v8m32; EB.v8m32; Bi.v8m32 ] +let is_thumb_only t = List.exists m_profiles ~f:(fun p -> p <= t) let register_pcode () = Dis.register pcode @@ fun t -> - let triple = match is_64bit t,is_little t,is_big t with - | true,true,_ -> "AARCH64:LE:64:v8A" - | true,_,_ -> "AARCH64:BE:64:v8A" - | false,true,_ -> "ARM:LE:32:v7" - | false,_,true -> "ARM:BE:32:v7" - | false,_,_ -> "ARM:LEBE:32:v7LEInstruction" in + let triple = + match (is_64bit t, is_little t, is_big t) with + | true, true, _ -> "AARCH64:LE:64:v8A" + | true, _, _ -> "AARCH64:BE:64:v8A" + | false, true, _ -> "ARM:LE:32:v7" + | false, _, true -> "ARM:BE:32:v7" + | false, _, _ -> "ARM:LEBE:32:v7LEInstruction" + in Dis.create ~backend:"ghidra" triple let enable_pcode () = register_pcode (); KB.promise Theory.Label.encoding @@ fun label -> Theory.Label.target label >>| fun t -> - if is_arm t then pcode - else Theory.Language.unknown + if is_arm t then pcode else Theory.Language.unknown let guess_encoding interworking label target mode = if is_arm target then - if is_64bit target then !!llvm_a64 else - if is_thumb_only target - then !!llvm_t32 + if is_64bit target then !!llvm_a64 + else if is_thumb_only target then !!llvm_t32 else let from_mode_or fallback = if Mode.equal mode Modes.t32 then !!llvm_t32 else if Mode.equal mode Modes.a32 then !!llvm_a32 - else fallback () in + else fallback () + in match interworking with - | Some true -> from_mode_or @@ fun () -> - compute_encoding_from_symbol_table label + | Some true -> + from_mode_or @@ fun () -> compute_encoding_from_symbol_table label | Some false -> !!llvm_a32 - | None -> from_mode_or @@ fun () -> - has_t32 label >>= function - | true -> compute_encoding_from_symbol_table label - | false -> !!llvm_a32 + | None -> ( + from_mode_or @@ fun () -> + has_t32 label >>= function + | true -> compute_encoding_from_symbol_table label + | false -> !!llvm_a32) else !!CT.Language.unknown (* our lowest supported llvm is 6.0 which supports up to v8.3a *) let max_v8a_version = 3 let v8aversions = - List.init max_v8a_version ~f:(fun i -> - sprintf "+v8.%da" (i+1)) + List.init max_v8a_version ~f:(fun i -> sprintf "+v8.%da" (i + 1)) let is_normal_feature s = - String.length s > 0 && match s.[0] with - | '+' | '-' -> true - | _ -> false + String.length s > 0 && match s.[0] with '+' | '-' -> true | _ -> false let normalize_features xs = - List.map xs ~f:(fun x -> - if is_normal_feature x - then x - else "+" ^ x) |> - String.concat ~sep:"," + List.map xs ~f:(fun x -> if is_normal_feature x then x else "+" ^ x) + |> String.concat ~sep:"," -let enable_llvm ?(features=[]) ?interworking () = +let enable_llvm ?(features = []) ?interworking () = let open KB.Syntax in - let features xs = normalize_features (xs@features) in + let features xs = normalize_features (xs @ features) in register llvm_a32 "armv7" ~attrs:(features []); - register llvm_t32 "thumbv7" ~attrs:(features ["thumb2"]); + register llvm_t32 "thumbv7" ~attrs:(features [ "thumb2" ]); register llvm_a64 "aarch64" ~attrs:(features v8aversions); KB.promise CT.Label.encoding @@ fun label -> let* target = CT.Label.target label in let* mode = KB.collect Mode.slot label in guess_encoding interworking label target mode -let load ?features ?interworking ?(backend="llvm") () = +let load ?features ?interworking ?(backend = "llvm") () = install_subtargets (); enable_loader (); enable_arch (); Encodings.provide (); - if String.equal backend "llvm" - then enable_llvm ?features ?interworking () + if String.equal backend "llvm" then enable_llvm ?features ?interworking () else enable_pcode () diff --git a/lib/arm/arm_target.mli b/lib/arm/arm_target.mli index ab20b3bf0..198a57de3 100644 --- a/lib/arm/arm_target.mli +++ b/lib/arm/arm_target.mli @@ -2,36 +2,32 @@ open Bap_core_theory - +val parent : Theory.Target.t (** The parent of all ARM targets. - When a new target is declared it is advised to use this target as - parent so that the newly declared target will be included into the - ARM Targets family. - The [parent] target is pure abstract and doesn't have any - propreties set. -*) -val parent : Theory.Target.t + When a new target is declared it is advised to use this target as parent so + that the newly declared target will be included into the ARM Targets family. + The [parent] target is pure abstract and doesn't have any propreties set. *) -(** A role for registers available in the thumb mode. *) val thumb : Theory.role +(** A role for registers available in the thumb mode. *) (** The family of little endian targets. - Each version is the parent to the following version, with [parent] - being the the earliest version.*) + Each version is the parent to the following version, with [parent] being the + the earliest version.*) module LE : sig val parent : Theory.Target.t + val aarch32 : Theory.Target.t (** a generic 32-bit target that accomodates all 32-bit targets. - @since 2.5.0 *) - val aarch32 : Theory.Target.t + @since 2.5.0 *) + val aarch64 : Theory.Target.t (** a generic 64-bit target that accomodates all 64-bit targets. @since 2.5.0 *) - val aarch64 : Theory.Target.t val v4 : Theory.Target.t val v4t : Theory.Target.t @@ -60,13 +56,12 @@ module LE : sig val v9a : Theory.Target.t end - (** The family of big endian targets. - Each version is the parent to the following version, with [parent] - being the the earliest version.*) + Each version is the parent to the following version, with [parent] being the + the earliest version.*) module EB : sig - val parent : Theory.Target.t (** currently the same as [v4] *) + val parent : Theory.Target.t (* currently the same as [v4] *) val v4 : Theory.Target.t val v4t : Theory.Target.t val v5 : Theory.Target.t @@ -95,14 +90,12 @@ module EB : sig val v9a : Theory.Target.t end - (** The family of targets with switchable endiannes. - The switchable (context-dependent) endianness was introduced - in [v7] therefore there are no targets of earlier version. -*) + The switchable (context-dependent) endianness was introduced in [v7] + therefore there are no targets of earlier version. *) module Bi : sig - val parent : Theory.Target.t (** the same as [v7] *) + val parent : Theory.Target.t (* the same as [v7] *) val v7 : Theory.Target.t val v7fp : Theory.Target.t val v7a : Theory.Target.t @@ -124,23 +117,22 @@ val llvm_a32 : Theory.language val llvm_t32 : Theory.language val llvm_a64 : Theory.language - +val load : + ?features:string list -> ?interworking:bool -> ?backend:string -> unit -> unit (** [load ()] loads the knowledge base rules for the ARM targets. - This includes parsing the loader output and enabling backward - compatibility with the old [Arch.t] representation. - - @param interworking if set disables/enables the interworking - mode (switching between arm and thumb modes). If not set, then - the presence of interworking is detected using heurisitics. Right - now if the heuristic looks into the symbol table and if there is - a symbol there with an odd address (which is used to indicate - thumb encoding) then interworking is enabled. - - @param features is the backend-specific list of features. The - syntax is vastly dependent on the backend. For llvm, in - particular, the features are translated to the disassembler - attributes. If the feature doesn't start with [+] or [-] then it - is assumed that the feature is enabled and [+] is prepended. -*) -val load : ?features:string list -> ?interworking:bool -> ?backend:string -> unit -> unit + This includes parsing the loader output and enabling backward compatibility + with the old [Arch.t] representation. + + @param interworking + if set disables/enables the interworking mode (switching between arm and + thumb modes). If not set, then the presence of interworking is detected + using heurisitics. Right now if the heuristic looks into the symbol table + and if there is a symbol there with an odd address (which is used to + indicate thumb encoding) then interworking is enabled. + + @param features + is the backend-specific list of features. The syntax is vastly dependent + on the backend. For llvm, in particular, the features are translated to + the disassembler attributes. If the feature doesn't start with [+] or [-] + then it is assumed that the feature is enabled and [+] is prepended. *) diff --git a/lib/arm/arm_types.ml b/lib/arm/arm_types.ml index 00e9259dd..b5bd95d1f 100644 --- a/lib/arm/arm_types.ml +++ b/lib/arm/arm_types.ml @@ -1,13 +1,12 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap.Std - module Basic = Disasm_expert.Basic exception Lifting_failed of string -type cond = [ - | `EQ +type cond = + [ `EQ | `NE | `CS | `CC @@ -21,15 +20,13 @@ type cond = [ | `LT | `GT | `LE - | `AL -] [@@deriving bin_io, compare, sexp, enumerate] - -type nil_reg = [ `Nil ] + | `AL ] [@@deriving bin_io, compare, sexp, enumerate] -(** General purpose registers *) -type gpr_reg = [ - | `R0 +type nil_reg = [ `Nil ] [@@deriving bin_io, compare, sexp, enumerate] + +type gpr_reg = + [ `R0 | `R1 | `R2 | `R3 @@ -44,35 +41,30 @@ type gpr_reg = [ | `R12 | `LR | `PC - | `SP -] [@@deriving bin_io, compare, sexp, enumerate] + | `SP ] +[@@deriving bin_io, compare, sexp, enumerate] +(** General purpose registers *) -type gpr_or_nil = [nil_reg | gpr_reg] +type gpr_or_nil = [ nil_reg | gpr_reg ] [@@deriving bin_io, compare, sexp, enumerate] -(** conditition code registers *) -type ccr_reg = [ - | `CPSR - | `SPSR - | `ITSTATE -] [@@deriving bin_io, compare, sexp, enumerate] +type ccr_reg = [ `CPSR | `SPSR | `ITSTATE ] +[@@deriving bin_io, compare, sexp, enumerate] +(** conditition code registers *) -type ccr_or_nil = [nil_reg | ccr_reg ] +type ccr_or_nil = [ nil_reg | ccr_reg ] [@@deriving bin_io, compare, sexp, enumerate] -type non_nil_reg = [gpr_reg | ccr_reg] +type non_nil_reg = [ gpr_reg | ccr_reg ] [@@deriving bin_io, compare, sexp, enumerate] -type reg = [nil_reg | non_nil_reg] +type reg = [ nil_reg | non_nil_reg ] [@@deriving bin_io, compare, sexp, enumerate] -type op = [ - | `Reg of reg - | `Imm of word -] [@@deriving bin_io, compare, sexp] +type op = [ `Reg of reg | `Imm of word ] [@@deriving bin_io, compare, sexp] -type move_insn = [ - | `ADCri +type move_insn = + [ `ADCri | `ADCrr | `ADCrsi | `ADCrsr @@ -138,11 +130,11 @@ type move_insn = [ | `TSTri | `TSTrr | `TSTrsi - | `TSTrsr -] [@@deriving bin_io, compare, sexp, enumerate] + | `TSTrsr ] +[@@deriving bin_io, compare, sexp, enumerate] -type bits_insn = [ - | `BFC +type bits_insn = + [ `BFC | `BFI | `PKHTB | `RBIT @@ -158,11 +150,11 @@ type bits_insn = [ | `UXTB | `UXTH | `REV - | `REV16 -] [@@deriving bin_io, compare, sexp, enumerate] + | `REV16 ] +[@@deriving bin_io, compare, sexp, enumerate] -type mult_insn = [ - | `MLA +type mult_insn = + [ `MLA | `MLS | `MUL | `SMLABB @@ -175,12 +167,11 @@ type mult_insn = [ | `SMULL | `SMULTB | `UMLAL - | `UMULL -] [@@deriving bin_io, compare, sexp, enumerate] - + | `UMULL ] +[@@deriving bin_io, compare, sexp, enumerate] -type mem_multi_insn = [ - | `LDMDA +type mem_multi_insn = + [ `LDMDA | `LDMDA_UPD | `LDMDB | `LDMDB_UPD @@ -195,12 +186,11 @@ type mem_multi_insn = [ | `STMIA | `STMIA_UPD | `STMIB - | `STMIB_UPD -] [@@deriving bin_io, compare, sexp, enumerate] - + | `STMIB_UPD ] +[@@deriving bin_io, compare, sexp, enumerate] -type mem_insn = [ - | mem_multi_insn +type mem_insn = + [ mem_multi_insn | `LDRBT_POST_IMM | `LDRBT_POST_REG | `LDRB_POST_IMM @@ -261,63 +251,43 @@ type mem_insn = [ | `STR_PRE_IMM | `STR_PRE_REG | `STRi12 - | `STRrs -] [@@deriving bin_io, compare, sexp, enumerate] + | `STRrs ] +[@@deriving bin_io, compare, sexp, enumerate] -type branch_insn = [ - | `BL - | `BLX - | `BLX_pred - | `BLXi - | `BL_pred - | `BX - | `BX_RET - | `BX_pred - | `Bcc -] [@@deriving bin_io, compare, sexp, enumerate] +type branch_insn = + [ `BL | `BLX | `BLX_pred | `BLXi | `BL_pred | `BX | `BX_RET | `BX_pred | `Bcc ] +[@@deriving bin_io, compare, sexp, enumerate] -type special_insn = [ - | `CPS2p - | `DMB - | `DSB - | `HINT - | `MRS - | `MSR - | `PLDi12 - | `SVC -] [@@deriving bin_io, compare, sexp, enumerate] +type special_insn = + [ `CPS2p | `DMB | `DSB | `HINT | `MRS | `MSR | `PLDi12 | `SVC ] +[@@deriving bin_io, compare, sexp, enumerate] -type insn = [ - | move_insn - | bits_insn - | mult_insn - | mem_insn - | branch_insn - | special_insn -] [@@deriving bin_io, compare, sexp, enumerate] +type insn = + [ move_insn | bits_insn | mult_insn | mem_insn | branch_insn | special_insn ] +[@@deriving bin_io, compare, sexp, enumerate] (** Memory access operations *) (** Types for single-register memory access *) type mode_r = Offset | PreIndex | PostIndex + type sign = Signed | Unsigned type operation = Ld | St -type size = B | H | W | D -[@@deriving compare] +type size = B | H | W | D [@@deriving compare] (** Types for multiple-register memory access *) type mode_m = IA | IB | DA | DB + type update_m = Update | NoUpdate -(** Types for data movement operations *) -type arth = [`ADD | `ADC | `SBC | `RSC | `SUB | `RSB ] -type move = [`AND | `BIC | `EOR | `MOV | `MVN | `ORR ] -type data_oper = [ arth | move] +type arth = [ `ADD | `ADC | `SBC | `RSC | `SUB | `RSB ] +(** Types for data movement operations *) -type repair = [`POS | `NEG] [@@deriving compare] +type move = [ `AND | `BIC | `EOR | `MOV | `MVN | `ORR ] +type data_oper = [ arth | move ] +type repair = [ `POS | `NEG ] [@@deriving compare] +type shift = [ `ASR | `LSL | `LSR | `ROR | `RRX ] (** shift types *) -type shift = [`ASR | `LSL | `LSR | `ROR | `RRX] - type smul_size = BB | BT | TB | TT | D | DX | WB | WT diff --git a/lib/arm/arm_utils.ml b/lib/arm/arm_utils.ml index 0fc4a58f3..734db2608 100644 --- a/lib/arm/arm_utils.ml +++ b/lib/arm/arm_utils.ml @@ -1,14 +1,14 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std - open Arm_types module Env = Arm_env let fail here fmt = - ksprintf (fun msg -> - let msg = sprintf "%s: %s" - (Source_code_position.to_string here) msg in - raise (Lifting_failed msg)) fmt + ksprintf + (fun msg -> + let msg = sprintf "%s: %s" (Source_code_position.to_string here) msg in + raise (Lifting_failed msg)) + fmt let assert_reg loc = function | `Imm _ -> fail loc "expected reg" @@ -18,41 +18,30 @@ let assert_imm loc = function | `Reg _ -> fail loc "expected imm" | `Imm imm -> imm - let assert_cond loc op = match Arm_cond.create (assert_imm loc op) with | Ok cond -> cond - | Error err -> fail loc "bad argument (cond): %s" @@ - Error.to_string_hum err - -let tmp ?(name="v") typ = - Var.create ~fresh:true ~is_virtual:true name typ + | Error err -> fail loc "bad argument (cond): %s" @@ Error.to_string_hum err - -let assn d s = - if Var.equal d Env.pc then Bil.jmp s else Bil.move d s +let tmp ?(name = "v") typ = Var.create ~fresh:true ~is_virtual:true name typ +let assn d s = if Var.equal d Env.pc then Bil.jmp s else Bil.move d s let bitlen = function | Type.Imm len -> len - | Type.Mem (_,size) -> Size.in_bits size - | Type.Unk -> - fail [%here] "can't infer length from unknown type" - + | Type.Mem (_, size) -> Size.in_bits size + | Type.Unk -> fail [%here] "can't infer length from unknown type" -let is_move = function - | Bil.Move _ -> true - | _ -> false +let is_move = function Bil.Move _ -> true | _ -> false -let exec - (stmts : stmt list) - ?(flags : stmt list option) - ?(wflag : op option) +let exec (stmts : stmt list) ?(flags : stmt list option) ?(wflag : op option) (cond : op) : stmt list = (* write to the flags if wflag is CPSR *) let cond = assert_cond [%here] cond in - let stmts = match flags, wflag with + let stmts = + match (flags, wflag) with | Some f, Some (`Reg `CPSR) -> stmts @ f - | _ -> stmts in + | _ -> stmts + in (* generates an expression for the given McCond *) let set_cond cond = let z = Bil.var Env.zf in @@ -71,26 +60,25 @@ let exec | `VS -> Bil.(v = t) | `VC -> Bil.(v = f) | `HI -> Bil.((c = t) land (z = f)) - | `LS -> Bil.((c = f) lor (z = t)) + | `LS -> Bil.((c = f) lor (z = t)) | `GE -> Bil.(n = v) | `LT -> Bil.(n <> v) - | `GT -> Bil.((z = f) land (n = v)) - | `LE -> Bil.((z = t) lor (n <> v)) - | `AL -> t in + | `GT -> Bil.((z = f) land (n = v)) + | `LE -> Bil.((z = t) lor (n <> v)) + | `AL -> t + in (* We shortcut if the condition = all *) match cond with | `AL -> stmts | _ when List.for_all stmts ~f:is_move -> - let cval = set_cond cond and cvar = tmp bool_t in - let cond = Bil.var cvar in - Bil.(cvar := cval) :: List.map stmts ~f:(function - | Bil.Move (v,_) as s when Var.is_virtual v -> s - | Bil.Move (v,x) -> - Bil.(v := ite ~if_:cond ~then_:x ~else_:(var v)) - | _ -> assert false) - | _ -> - [Bil.If (set_cond cond, stmts, [])] - + let cval = set_cond cond and cvar = tmp bool_t in + let cond = Bil.var cvar in + Bil.(cvar := cval) + :: List.map stmts ~f:(function + | Bil.Move (v, _) as s when Var.is_virtual v -> s + | Bil.Move (v, x) -> Bil.(v := ite ~if_:cond ~then_:x ~else_:(var v)) + | _ -> assert false) + | _ -> [ Bil.If (set_cond cond, stmts, []) ] let exp_of_reg reg = Bil.var (Env.of_reg reg) @@ -98,13 +86,7 @@ let exp_of_op = function | `Reg reg -> exp_of_reg reg | `Imm word -> Bil.int word -let cast_type = function - | Signed -> Bil.signed - | Unsigned -> Bil.unsigned - +let cast_type = function Signed -> Bil.signed | Unsigned -> Bil.unsigned let cast_of_sign sign size exp = Bil.cast (cast_type sign) size exp - - - let msb r = Bil.(cast high 1 r) let zero ty = Bil.int (Word.zero (bitlen ty)) diff --git a/lib/arm/arm_utils.mli b/lib/arm/arm_utils.mli index 41eee2431..75e990029 100644 --- a/lib/arm/arm_utils.mli +++ b/lib/arm/arm_utils.mli @@ -1,31 +1,17 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Arm_types - val tmp : ?name:string -> typ -> var - val assn : var -> exp -> stmt - -val fail : Source_code_position.t -> ('a,unit,string,'b) format4 -> 'a - +val fail : Source_code_position.t -> ('a, unit, string, 'b) format4 -> 'a val bitlen : typ -> int - val exec : stmt list -> ?flags:stmt list -> ?wflag:op -> op -> stmt list - val exp_of_op : op -> exp - val exp_of_reg : reg -> exp - val cast_of_sign : sign -> int -> exp -> exp - - val assert_reg : Source_code_position.t -> op -> reg - val assert_imm : Source_code_position.t -> op -> word - val assert_cond : Source_code_position.t -> op -> cond - val msb : exp -> exp - val zero : typ -> exp diff --git a/lib/arm/dune b/lib/arm/dune index 93dadcb7e..a1ccad717 100644 --- a/lib/arm/dune +++ b/lib/arm/dune @@ -1,16 +1,17 @@ (library (name bap_arm) (public_name bap-arm) - (preprocess (pps ppx_bap)) + (preprocess + (pps ppx_bap)) (wrapped false) (libraries - bap - bap-core-theory - bap-knowledge - bap-primus - bap-traces - bitvec - bitvec-order - core_kernel - ogre - regular)) + bap + bap-core-theory + bap-knowledge + bap-primus + bap-traces + bitvec + bitvec-order + core + ogre + regular)) diff --git a/lib/bap/bap.ml b/lib/bap/bap.ml index 86c7bd46a..9a87d06ef 100644 --- a/lib/bap/bap.ml +++ b/lib/bap/bap.ml @@ -1,28 +1,36 @@ -open Core_kernel[@@warning "-D"] +open Core module Std = struct type 'a printer = Format.formatter -> 'a -> unit + include Bap_types.Std include Bap_image_std include Bap_disasm_std include Bap_sema.Std + module Event = struct include Bap_main_event - include Regular.Std.Printable.Make(struct - type t = Bap_main_event.t - let module_name = Some "Bap.Std.Event" - let pp = Bap_main_event.pp - end) + + include Regular.Std.Printable.Make (struct + type t = Bap_main_event.t + + let module_name = Some "Bap.Std.Event" + let pp = Bap_main_event.pp + end) end + module Project = Bap_project module Self = Bap_self.Create + module Log = struct let start ?logdir:_ () = Event.Log.message Event.Log.Warning ~section:"main" - "The deprecated Bap.Std.Log.start function is used. \ - This function does nothing. Use `Bap_main.init' instead" + "The deprecated Bap.Std.Log.start function is used. This function does \ + nothing. Use `Bap_main.init' instead" end + type project = Project.t type event = Event.t = .. + module Monad = Legacy.Monad end diff --git a/lib/bap/bap.mli b/lib/bap/bap.mli index f9d94dbc2..a0ac805db 100644 --- a/lib/bap/bap.mli +++ b/lib/bap/bap.mli @@ -1,6 +1,6 @@ -(** BAP Standard Library *) +(** BAP Standard Library *) -open Core_kernel[@@warning "-D"] +open Core open Monads.Std open Regular.Std open Graphlib.Std @@ -15,13 +15,13 @@ module Std : sig {3 Layered Architecture} - The BAP library has the layered architecture consisting of four - layers. Although the layers are not really observable from outside - of the library, they make it easier to learn the library as they - introduce new concepts sequentially. On top of these layers, the - {{!section:project}Project} module is defined that consolidates - all information about a target of analysis. The [Project] module - may be viewed as an entry point to the library. + The BAP library has the layered architecture consisting of four layers. + Although the layers are not really observable from outside of the library, + they make it easier to learn the library as they introduce new concepts + sequentially. On top of these layers, the {{!section:project}Project} + module is defined that consolidates all information about a target of + analysis. The [Project] module may be viewed as an entry point to the + library. {v +-----------------------------------------------------+ @@ -51,54 +51,49 @@ module Std : sig +-----------------------------------------------------+ v} - The {{!bfl}Foundation library} defines {{!Bil}BAP Instruction - language} data types, as well as other useful data structures, - like {!Value}, {!Trie}, {!Vector}, etc. The - {{!section:image}Memory model} layer is responsible for loading - and parsing binary objects and representing them in a computer - memory. It also defines a few useful data structures that are - used extensively by later layers, e.g., {!Table} and - {!Memmap}. The next layer performs - {{!section:disasm}disassembly} and lifting to BIL. Finally, the - {{!section:sema}semantic analysis} layer transforms a binary - into an IR representation, that is suitable for writing - analysis. + The {{!bfl}Foundation library} defines {{!Bil}BAP Instruction language} + data types, as well as other useful data structures, like {!Value}, + {!Trie}, {!Vector}, etc. The {{!section:image}Memory model} layer is + responsible for loading and parsing binary objects and representing them + in a computer memory. It also defines a few useful data structures that + are used extensively by later layers, e.g., {!Table} and {!Memmap}. The + next layer performs {{!section:disasm}disassembly} and lifting to BIL. + Finally, the {{!section:sema}semantic analysis} layer transforms a binary + into an IR representation, that is suitable for writing analysis. {3 Plugin Architecture} - The standard library tries to be as extensible as possible. We - are aware, that there are not good solutions for some problems, so - we don't want to force our way of doing things. In short, we're - trying to provide mechanisms, not policies. We achieve this by - employing the dependency injection principle. By inversing the - dependency we allow the library to depend on a user code. For - example, a user code can teach the library how to disassemble - the binary or even how to reconstruct the CFG. In fact, the - library by itself doesn't contain the disassembler or lifter, or - any architecture specific code. Everything is injected later by - corresponding plugins. - - The library defines a fixed set of extension points. (Other - libraries, that constitute the Platform and follow the same - principle, can define their own extension points, so the - following set is not complete): - - - loader - add new file formats (see {!Image.register_backend} or {!Project.Input}); + The standard library tries to be as extensible as possible. We are aware, + that there are not good solutions for some problems, so we don't want to + force our way of doing things. In short, we're trying to provide + mechanisms, not policies. We achieve this by employing the dependency + injection principle. By inversing the dependency we allow the library to + depend on a user code. For example, a user code can teach the library how + to disassemble the binary or even how to reconstruct the CFG. In fact, the + library by itself doesn't contain the disassembler or lifter, or any + architecture specific code. Everything is injected later by corresponding + plugins. + + The library defines a fixed set of extension points. (Other libraries, + that constitute the Platform and follow the same principle, can define + their own extension points, so the following set is not complete): + + - loader - add new file formats (see {!Image.register_backend} or + {!Project.Input}); - target - add new architecture (see {!register_target}); - - disassembler - plug in a disassembler (see 'disasm.hpp' for c++ disassembler interface); + - disassembler - plug in a disassembler (see 'disasm.hpp' for c++ + disassembler interface); - attributes - extend the attribute type ({!Value.Tag.register}); - symbolizer - add names to functions (see {!Symbolizer}); - rooter - find function starts (see {!Rooter}); - brancher - resolve jump destinations (see {!Brancher}) - - reconstructor - CFG reconstruction algorithm (see - {!Reconstructor}); + - reconstructor - CFG reconstruction algorithm (see {!Reconstructor}); - analysis - write your own arbitrary analysis pass (see {!Project.register_pass}) - The {!Regular.Std} library, that forms a foundation for the BAP - Standard Library, also follows the dependency injection - principle, so every data type that implements regular interface, - can be dynamically extended with: + The {!Regular.Std} library, that forms a foundation for the BAP Standard + Library, also follows the dependency injection principle, so every data + type that implements regular interface, can be dynamically extended with: - pretty printing function; - serialization subroutines; @@ -106,48 +101,44 @@ module Std : sig {3 Writing the analysis} - A common use case, is to write some analysis that will take the - program in some representation and then either output result of - analysis in a human or machine readable way, or transform the - program, in a way that can be employed by other - analysis. Following a naming convention of a more established - community of compiler writers, we name such analysis a _pass_. + A common use case, is to write some analysis that will take the program in + some representation and then either output result of analysis in a human + or machine readable way, or transform the program, in a way that can be + employed by other analysis. Following a naming convention of a more + established community of compiler writers, we name such analysis a _pass_. The library itself doesn't run any analysis, it part of the job of a - frontend to run it. In particular, the [bap] frontend, will run - the analyses based on a command line specification. See [bap - --help] for more information. - - We use {!Project} data structure to represent a program and all - associated knowledge that we were capable to infer. To learn - how to use the project data structure continue to - {!section:project}. - *) + frontend to run it. In particular, the [bap] frontend, will run the + analyses based on a command line specification. See [bap --help] for more + information. + + We use {!Project} data structure to represent a program and all associated + knowledge that we were capable to infer. To learn how to use the project + data structure continue to {!section:project}. *) (** {2:bfl Foundation Library} - At this layer we define ({{!Bil}Binary Instruction language}) - and few other useful data structures: + At this layer we define ({{!Bil}Binary Instruction language}) and few + other useful data structures: - {{!Arch}arch} - describes computer architecture; - {{!Size}size} - word and register sizes; - - {{!Var}var} - {{!Bil}BIL} variable; + - {{!Var}var} - {{!Bil}BIL} variable; - {{!Type}typ} - {{!Bil}BIL} type system; - - {{!Exp}exp} - {{!Bil}BIL} expression sub-language; + - {{!Exp}exp} - {{!Bil}BIL} expression sub-language; - {{!Stmt}stmt} - {{!Bil}BIL} statements; - - {{!Bitvector}bitvector} - a bitvector data structure - to represent immediate data, used usually by their aliases {!word} and {!addr}; + - {{!Bitvector}bitvector} - a bitvector data structure to represent + immediate data, used usually by their aliases {!word} and {!addr}; - {{!Value}value} - an extensible variant type; - {{!Dict}dict} - an extensible record; - {{!Vector}vector} - an array that can grow; - {{!Trie}Trie} - prefix trees; Most of the types implement the {{!Regular.Std.Regular.S}Regular} - interface. This interface is very similar to Core's - [Identifiable], and is supposed to represent a type that is as - common as a built-in type. One should expect to find any - function that is implemented for such types as [int], [string], - [char], etc. Namely, this interface includes: + interface. This interface is very similar to Core's [Identifiable], and is + supposed to represent a type that is as common as a built-in type. One + should expect to find any function that is implemented for such types as + [int], [string], [char], etc. Namely, this interface includes: - comparison functions: ([<, >, <= , >= , compare, between, ...]); - each type defines a polymorphic [Map] with keys of type [t]; @@ -155,115 +146,111 @@ module Std : sig - hashtable is exposed via [Table] module; - hashset is available under [Hash_set] name - sexpable and binable interface; - - [to_string], [str], [pp], [ppo], [pps] functions - for pretty-printing. + - [to_string], [str], [pp], [ppo], [pps] functions for pretty-printing. - It is a convention, that for each type, there is a module with - the same name that implements its interface. For example, type - [exp] is a type abbreviation for [Exp.t], and module [Exp] - contains all functions and types related to type [exp]. For - example, to create a hashtable of statements, just type: + It is a convention, that for each type, there is a module with the same + name that implements its interface. For example, type [exp] is a type + abbreviation for [Exp.t], and module [Exp] contains all functions and + types related to type [exp]. For example, to create a hashtable of + statements, just type: [let table = Exp.Table.create ()] - If a type is a variant type (i.e., it defines constructors) then - for each constructor named [Name], there exists a corresponding - function named [name] that will accept the same number of - arguments as the arity of the constructor (also named a - _functional constructor_). For example, a [Bil.Int] can be - constructed with the [Bil.int] function that has type [word -> - exp]. If a constructor has several arguments of the same type we - usually disambiguate using labels, e.g., [Bil.Load of - (exp,exp,endian,size)] has function {{!Bil.load}Bil.load} with - type: [mem:exp -> addr:exp -> endian -> size -> exp] + If a type is a variant type (i.e., it defines constructors) then for each + constructor named [Name], there exists a corresponding function named + [name] that will accept the same number of arguments as the arity of the + constructor (also named a _functional constructor_). For example, a + [Bil.Int] can be constructed with the [Bil.int] function that has type + [word -> exp]. If a constructor has several arguments of the same type we + usually disambiguate using labels, e.g., + [Bil.Load of (exp,exp,endian,size)] has function {{!Bil.load}Bil.load} + with type: [mem:exp -> addr:exp -> endian -> size -> exp] {3:value Value} {{!Value}Universal values} can be viewed as extensible variants on - steroids. Not only they maybe extended, but they also can be - serialized, compared with user-defined comparison function and - even pretty printed. + steroids. Not only they maybe extended, but they also can be serialized, + compared with user-defined comparison function and even pretty printed. {3:dict Dict} - Like {{!Value}value} is an extensible sum type, {{!Dict}dict} - can be viewed as an extensible product type. Dict is a sequence - of values of type {!value}, with {{!Value.Tag}tags} used as - field names. Of course, fields are unique. + Like {{!Value}value} is an extensible sum type, {{!Dict}dict} can be + viewed as an extensible product type. Dict is a sequence of values of type + {!value}, with {{!Value.Tag}tags} used as field names. Of course, fields + are unique. {3:vector Vector} - {!Vector} is an implementation of C++ STL like vectors with - logarithmic push back. + {!Vector} is an implementation of C++ STL like vectors with logarithmic + push back. {3:tries Tries} - The Foundation library also defines a prefix tree data structure - that proves to be useful for binary analysis applications. - {{!module:Trie}Trie}s in BAP is a functor that derives a - polymorphic trie data structure for a given - {{!modtype:Trie.Key}Key}. + The Foundation library also defines a prefix tree data structure that + proves to be useful for binary analysis applications. + {{!module:Trie}Trie}s in BAP is a functor that derives a polymorphic trie + data structure for a given {{!module-type:Trie.Key}Key}. - For convenience we support instantiating tries for most of - our data structures. For example, {{!Bitvector}Word} has several + For convenience we support instantiating tries for most of our data + structures. For example, {{!Bitvector}Word} has several {{!Bitvector.Trie}tries} inside. - For the common string trie, there's {!Trie.String}. *) + For the common string trie, there's {!Trie.String}. *) (** {2:image Memory model} - This layer is responsible for the representation of binaries. It - provides interfaces for the memory objects: + This layer is responsible for the representation of binaries. It provides + interfaces for the memory objects: - - {{!Memory}mem} - a contiguous array of bytes, indexed with - absolute addresses; + - {{!Memory}mem} - a contiguous array of bytes, indexed with absolute + addresses; - - {{!Table} 'a table} - a mapping from a memory regions to - arbitrary data (no duplicates or intersections); + - {{!Table} 'a table} - a mapping from a memory regions to arbitrary data + (no duplicates or intersections); - - {{!Memmap}a memmap} - a mapping from memory region to - arbitrary data with duplicates and intersections allowed, aka - segment tree or interval map; + - {{!Memmap}a memmap} - a mapping from memory region to arbitrary data + with duplicates and intersections allowed, aka segment tree or interval + map; - - {{!Image}image} - represents a binary object with all its - symbols, segments, sections and other meta information. + - {{!Image}image} - represents a binary object with all its symbols, + segments, sections and other meta information. - The [Image] module uses the plugin system to load binary - objects. In order to add new loader, one should implement the - {{!Backend}Backend.t} loader function and register it with the + The [Image] module uses the plugin system to load binary objects. In order + to add new loader, one should implement the {{!Backend}Backend.t} loader + function and register it with the {{!Image.register_backend}Image.register_backend} function. *) (** {2:disasm Disassembler} - This layer defines the interfaces for disassemblers. Two interfaces - are provided: + This layer defines the interfaces for disassemblers. Two interfaces are + provided: - - {{!Disasm}Disasm} - a regular interface that hides all - complexities, but may not always be very flexible. - - {{!Disasm_expert}Disasm_expert} - an expert interface that - provides access to a low-level representation. It is very - flexible and fast, but harder to use. + - {{!Disasm}Disasm} - a regular interface that hides all complexities, but + may not always be very flexible. + - {{!Disasm_expert}Disasm_expert} - an expert interface that provides + access to a low-level representation. It is very flexible and fast, but + harder to use. - To disassemble files or data with the regular interface, use - one of the following functions: + To disassemble files or data with the regular interface, use one of the + following functions: - {!Disasm.of_mem} - to disassemble a region of memory; - {!Disasm.of_image} - to disassemble a loaded binary object; - {!Disasm.of_file} - to disassemble file. - All these functions perform disassembly by recursive descent, - reconstruct the control flow graph, and perform lifting. + All these functions perform disassembly by recursive descent, reconstruct + the control flow graph, and perform lifting. - The result of disassembly is represented by the abstract value - of type {{!Disasm}disasm}. Two main data structures that are - used to represent disassembled program are: + The result of disassembly is represented by the abstract value of type + {{!Disasm}disasm}. Two main data structures that are used to represent + disassembled program are: - {{!Insn}insn} - a machine instruction; - {{!Block}block} - a basic block, i.e., a linear sequence of instructions. - The following figure shows the relationship between basic data - structures of the disassembled program. + The following figure shows the relationship between basic data structures + of the disassembled program. {v +-----------------+ @@ -289,52 +276,46 @@ module Std : sig v} A disassembled program is represented as a set of interconnected - {{!Block}basic blocks}, called a whole program control flow - graph (CFG) and it is indeed represented as a graph - {!Graphs.Cfg}. See {{!Graphlib.Std}graphlib} for more - information on graphs. - - Each block is a container to a sequence of machine - instructions. It is guaranteed that there's at least one - instruction in the block, thus the {{!Block.leader}Block.leader} - and {{!Block.terminator}Block.terminator} functions are total. - - Each {{!Insn}machine instruction} is represented by its - [opcode], [name] and [array] of operands (these are machine and - disassembler specific), a set of predicates (that approximates - instruction semantics on a very high level), and a sequence of - {{!Bil}BIL} statements that precisely define the semantics of - the instruction. - - The expert interface exposes low level interface that provides - facilities for building custom implementations of - disassemblers. The interface to the disassembler backend is - exposed via the {!Disasm_expert.Basic} module. New backends can - be added by implementing the 'disasm.hpp' interface. - - Modules of type {{!CPU}CPU} provide a high level abstraction of - the machine CPU and allow one to reason about the instruction - semantics independently from the target platform. The module - type {{!Target}Target} brings [CPU] and [ABI] together. To get - an instance of this module, you can use the - {{!target_of_arch}target_of_arch} function. Architecture - specific implementations of the [Target] interface may (and - usually do) provide more information, see corresponding support - libraries for {!ARM} and {{!X86_cpu}x86} architectures. - *) + {{!Block}basic blocks}, called a whole program control flow graph (CFG) + and it is indeed represented as a graph {!Graphs.Cfg}. See + {{!Graphlib.Std}graphlib} for more information on graphs. + + Each block is a container to a sequence of machine instructions. It is + guaranteed that there's at least one instruction in the block, thus the + {{!Block.leader}Block.leader} and {{!Block.terminator}Block.terminator} + functions are total. + + Each {{!Insn}machine instruction} is represented by its [opcode], [name] + and [array] of operands (these are machine and disassembler specific), a + set of predicates (that approximates instruction semantics on a very high + level), and a sequence of {{!Bil}BIL} statements that precisely define the + semantics of the instruction. + + The expert interface exposes low level interface that provides facilities + for building custom implementations of disassemblers. The interface to the + disassembler backend is exposed via the {!Disasm_expert.Basic} module. New + backends can be added by implementing the 'disasm.hpp' interface. + + Modules of type {{!CPU}CPU} provide a high level abstraction of the + machine CPU and allow one to reason about the instruction semantics + independently from the target platform. The module type {{!Target}Target} + brings [CPU] and [ABI] together. To get an instance of this module, you + can use the {{!target_of_arch}target_of_arch} function. Architecture + specific implementations of the [Target] interface may (and usually do) + provide more information, see corresponding support libraries for {!ARM} + and {{!X86_cpu}x86} architectures. *) (** {2:sema Semantic Analysis} - On the semantic level the disassembled program is lifted into - the BAP Intermediate Representation (BIR). BIR is a - semi-graphical representation of BIL (where BIL represents a - program as Abstract Syntax Tree). The BIR provides mechanisms to - express richer relationships between program terms and it also - easier to use for most use cases, especially for data dependency - analysis. + On the semantic level the disassembled program is lifted into the BAP + Intermediate Representation (BIR). BIR is a semi-graphical representation + of BIL (where BIL represents a program as Abstract Syntax Tree). The BIR + provides mechanisms to express richer relationships between program terms + and it also easier to use for most use cases, especially for data + dependency analysis. - The program in IR is build of terms. In fact the program itself - is also a term. There're only 7 kinds of terms: + The program in IR is build of terms. In fact the program itself is also a + term. There're only 7 kinds of terms: - {{!Program}program} - the program in whole; - {{!Sub}sub} - subroutine; @@ -344,33 +325,29 @@ module Std : sig - {{!Phi}phi} - phi-node in the SSA form; - {{!Jmp}jmp} - a transfer of control. - Unlike expressions and statements in BIL, IR's terms are {e - concrete entities}. Concrete entity is such entity that can - change in time and space, as well as come in and out of - existence. Contrary, {e abstract entity} is eternal and - unchangeable. {e Identity} denotes the sameness of a concrete - entity as it changes in time. Abstract entities don't have an - identity since they are immutable. Program is built of concrete - entities called terms. Terms have {e attributes} that can - change in time, without affecting the identity of a term. - Attributes are abstract entities. In each particular point of - space and time a term is represented by a snapshot of all its - attributes, colloquially called {e value}. Functions that - change the value of a term in fact return a new value with - different set of attributes. For example, [def] term has two - attributes: left hand side (lhs), that associates definition - with abstract variable, and right hand side (rhs) that - associates [def] with an abstract expression. Suppose, that the - definition was: + Unlike expressions and statements in BIL, IR's terms are + {e concrete entities}. Concrete entity is such entity that can change in + time and space, as well as come in and out of existence. Contrary, + {e abstract entity} is eternal and unchangeable. {e Identity} denotes the + sameness of a concrete entity as it changes in time. Abstract entities + don't have an identity since they are immutable. Program is built of + concrete entities called terms. Terms have {e attributes} that can change + in time, without affecting the identity of a term. Attributes are abstract + entities. In each particular point of space and time a term is represented + by a snapshot of all its attributes, colloquially called {e value}. + Functions that change the value of a term in fact return a new value with + different set of attributes. For example, [def] term has two attributes: + left hand side (lhs), that associates definition with abstract variable, + and right hand side (rhs) that associates [def] with an abstract + expression. Suppose, that the definition was: {[ # let d_1 = Def.create x Bil.(var y + var z);; val d_1 : Def.t = 00000001: x := y + z ]} - To change the right hand side of a definition we use - [Def.with_rhs] that returns the {e same} definition but with - {e different} value: + To change the right hand side of a definition we use [Def.with_rhs] that + returns the {e same} definition but with {e different} value: {[ # let d_2 = Def.with_rhs d_1 Bil.(int Word.b1);; @@ -382,25 +359,26 @@ module Std : sig {[ # Def.equal d_1 d_2;; - : bool = false - ]} of the same term {[ + ]} + of the same term + {[ # Term.same d_1 d_2;; - : bool = true ]} - The identity of this terms is denoted by the term identifier - ([tid]). In the textual representation term identifiers are - printed as ordinal numbers. - - Terms, can contain other terms. But unlike BIL expressions or - statements, this relation is not truly recursive, since the - structure of program term is fixed: [arg], [phi], [def], [jmp] - are leaf terms; [sub] can only contain [arg]'s or [blk]'s; [blk] - consists of [phi], [def] and [jmp] sequences of terms, as - pictured in the figure below. Although, the term structure is - closed to changes, you still can extend particular term with - attributes, using [set_attr] and [get_attr] functions of the - {{!Term}Term} module. This functions are using {{!Value}extensible - variant} type to encode attributes. + The identity of this terms is denoted by the term identifier ([tid]). In + the textual representation term identifiers are printed as ordinal + numbers. + + Terms, can contain other terms. But unlike BIL expressions or statements, + this relation is not truly recursive, since the structure of program term + is fixed: [arg], [phi], [def], [jmp] are leaf terms; [sub] can only + contain [arg]'s or [blk]'s; [blk] consists of [phi], [def] and [jmp] + sequences of terms, as pictured in the figure below. Although, the term + structure is closed to changes, you still can extend particular term with + attributes, using [set_attr] and [get_attr] functions of the {{!Term}Term} + module. This functions are using {{!Value}extensible variant} type to + encode attributes. {v +--------------------------------------------------------+ @@ -424,9 +402,7 @@ module Std : sig | | phi | | def | | jmp | | | +-----------+ +-----------+ +----------+ | +--------------------------------------------------------+ - v} - - *) + v} *) (** {2:project Working with project} @@ -435,8 +411,8 @@ module Std : sig - create it manually using {!Project.create} function; - write a plugin to the [bap] frontend. - Although the first approach is simplistic and gives you a full - control, we still recommend to use the latter. + Although the first approach is simplistic and gives you a full control, we + still recommend to use the latter. To write a program analysis plugin (or pass in short) you need to implement a function with one of the following interfaces: @@ -444,29 +420,29 @@ module Std : sig - [project -> project] and register it with {{!Project.register_pass}register_pass}; - [project -> unit] and register it with - {{!Project.register_pass'}register_pass'}; + {{!Project.register_pass'}register_pass'}; - Once loaded from the [bap] frontend (see [bap --help]) this - function will be invoked with a value of type - {{!Project.t}project} that provides access to all information - gathered from the input source. If the registered function - returns a non [unit] type, then it can functionally update the - project state, e.g., add annotations, discover new symbols, - transform program representation, etc. + Once loaded from the [bap] frontend (see [bap --help]) this function will + be invoked with a value of type {{!Project.t}project} that provides access + to all information gathered from the input source. If the registered + function returns a non [unit] type, then it can functionally update the + project state, e.g., add annotations, discover new symbols, transform + program representation, etc. {4 Example} The following plugin prints all sections in a file: {[ - open Core_kernel[@@warning "-D"] + open Core open Bap.Std open Format let print_sections p = - Project.memory p |> Memmap.to_sequence |> Seq.iter ~f:(fun (mem,x) -> - Option.iter (Value.get Image.section x) ~f:(fun name -> - printf "Section: %s@.%a@." name Memory.pp mem)) + Project.memory p |> Memmap.to_sequence + |> Seq.iter ~f:(fun (mem, x) -> + Option.iter (Value.get Image.section x) ~f:(fun name -> + printf "Section: %s@.%a@." name Memory.pp mem)) let () = Project.register_pass' print_sections ]} @@ -475,104 +451,98 @@ module Std : sig {3 Passing information between passes} - To pass data from one pass to another in a type safe manner, we - use {{!Value}universal values}. Values can be attached to a - particular memory region, IR terms, or put into the [storage] - dictionary. For the first case we use the {{!Memmap}memmap} data - structure. It is an interval tree containing all the memory - regions that are used during analysis. For the [storage] we use - [Dict] data structure. Also, each program term, has its own - dictionary. + To pass data from one pass to another in a type safe manner, we use + {{!Value}universal values}. Values can be attached to a particular memory + region, IR terms, or put into the [storage] dictionary. For the first case + we use the {{!Memmap}memmap} data structure. It is an interval tree + containing all the memory regions that are used during analysis. For the + [storage] we use [Dict] data structure. Also, each program term, has its + own dictionary. {3 Memory annotations} By default the memory is annotated with the following attributes: - - {{!Image.section}section} -- for regions of memory that had a - particular name in the original binary. For example, in ELF, - sections have names that annotate a corresponding memory - region. If project was created from memory object, then the - overall memory will be marked as a ["bap.user"] section. + - {{!Image.section}section} -- for regions of memory that had a particular + name in the original binary. For example, in ELF, sections have names + that annotate a corresponding memory region. If project was created from + memory object, then the overall memory will be marked as a ["bap.user"] + section. - - {{!Image.segment}segment} -- if the binary data was loaded - from a binary format that contains segments, then the - corresponding memory regions are be marked. Segments provide - access to permission information. *) - - (** {1:api BAP API} *) + - {{!Image.segment}segment} -- if the binary data was loaded from a binary + format that contains segments, then the corresponding memory regions are + be marked. Segments provide access to permission information. *) + (** {1:api BAP API} *) (** Abstract integral type. - This module describes an interface of an integral arithmetic - type, as well as the [Make] functor, that derives a module that - implements this interface from a module that provides the - minimal ([Base]) interface *) + This module describes an interface of an integral arithmetic type, as well + as the [Make] functor, that derives a module that implements this + interface from a module that provides the minimal ([Base]) interface *) module Integer : sig - - (** The minimal interface of an integer. *) + (** The minimal interface of an integer. *) module type Base = sig - - (** type of integer *) type t + (** type of integer *) - (** element neutral to the addition *) val zero : t + (** element neutral to the addition *) - (** element neutral to the multiplication *) - val one : t + val one : t + (** element neutral to the multiplication *) - (** [succ n] successor of [n] *) val succ : t -> t + (** [succ n] successor of [n] *) - (** [pred n] is a predecessor of [n] *) val pred : t -> t + (** [pred n] is a predecessor of [n] *) + val abs : t -> t (** [abs x] absolute value of [x] *) - val abs : t -> t + val neg : t -> t (** [neg x] = [-x] *) - val neg : t -> t + val add : t -> t -> t (** [add x y] is [x + y] *) - val add : t -> t -> t + val sub : t -> t -> t (** [sub x y] is [x - y] *) - val sub : t -> t -> t - (** [mul x y] is [x * y] *) - val mul : t -> t -> t + val mul : t -> t -> t + (** [mul x y] is [x * y] *) - (** [div x y] is [x / y] *) - val div : t -> t -> t + val div : t -> t -> t + (** [div x y] is [x / y] *) - (** [modulo x y] is [x mod y] *) - val modulo : t -> t -> t + val modulo : t -> t -> t + (** [modulo x y] is [x mod y] *) + val lnot : t -> t (** [lnot x] is a logical negation of [x] (1-complement) *) - val lnot : t -> t - (** [logand x y] is a conjunction of [x] and [y] *) - val logand : t -> t -> t - (** [logor x y] is a disjunction of [x] and [y] *) - val logor : t -> t -> t + val logand : t -> t -> t + (** [logand x y] is a conjunction of [x] and [y] *) + + val logor : t -> t -> t + (** [logor x y] is a disjunction of [x] and [y] *) - (** [logxor x y] is exclusive or between [x] and [y] *) - val logxor : t -> t -> t + val logxor : t -> t -> t + (** [logxor x y] is exclusive or between [x] and [y] *) + val lshift : t -> t -> t (** [lshift x y] shift [x] by [y] bits left *) - val lshift : t -> t -> t - (** [rshift x y] shift [x] by [y] bits to the right *) - val rshift : t -> t -> t + val rshift : t -> t -> t + (** [rshift x y] shift [x] by [y] bits to the right *) - (** [arshift x y] shift [x] by [y] bits to the right and fill with - the sign bit. *) val arshift : t -> t -> t - + (** [arshift x y] shift [x] by [y] bits to the right and fill with the + sign bit. *) end - (** The integer signature. *) + (** The integer signature. *) module type S = sig type t @@ -580,94 +550,107 @@ module Std : sig (** {3 A common set of infix operators} *) - (** [~-x = neg x] *) - val ( ~-) : t -> t + val ( ~- ) : t -> t + (** [~-x = neg x] *) - (** [x + y = add x y] *) - val ( + ) : t -> t -> t + val ( + ) : t -> t -> t + (** [x + y = add x y] *) - (** [x - y = sub x y] *) - val ( - ) : t -> t -> t + val ( - ) : t -> t -> t + (** [x - y = sub x y] *) - (** [x * y = mul x y] *) - val ( * ) : t -> t -> t + val ( * ) : t -> t -> t + (** [x * y = mul x y] *) - (** [x / y = div x y] *) - val ( / ) : t -> t -> t + val ( / ) : t -> t -> t + (** [x / y = div x y] *) - (** [x mod y = modulo x y] *) - val (mod) : t -> t -> t + val ( mod ) : t -> t -> t + (** [x mod y = modulo x y] *) - (** [x land y = logand x y] *) - val (land) : t -> t -> t + val ( land ) : t -> t -> t + (** [x land y = logand x y] *) - (** [x lor y = logor x y] *) - val (lor) : t -> t -> t + val ( lor ) : t -> t -> t + (** [x lor y = logor x y] *) - (** [lxor x y = logxor x y] *) - val (lxor) : t -> t -> t + val ( lxor ) : t -> t -> t + (** [lxor x y = logxor x y] *) - (** [x lsl y = lshift x y] *) - val (lsl) : t -> t -> t + val ( lsl ) : t -> t -> t + (** [x lsl y = lshift x y] *) - (** [x lsr y] = rshift x y *) - val (lsr) : t -> t -> t + val ( lsr ) : t -> t -> t + (** [x lsr y] = rshift x y *) - (** [x asr y = arshift x y] *) - val (asr) : t -> t -> t + val ( asr ) : t -> t -> t + (** [x asr y = arshift x y] *) end - (** Derive {!S} from the minimal implementation. *) - module Make(T : Base) : S with type t = T.t + (** Derive {!S} from the minimal implementation. *) + module Make (T : Base) : S with type t = T.t end (**/**) (** Legacy - @deprecated Definitions in this module are deprecated - **) + @deprecated Definitions in this module are deprecated **) module Legacy : sig module Monad : sig - open Core_kernel[@@warning "-D"] + open Core + module type Basic = Monad.Basic module type Basic2 = Monad.Basic2 module type Infix = Monad.Infix module type Infix2 = Monad.Infix2 module type S = Monad.S module type S2 = Monad.S2 - module Make(M : Basic) : S with type 'a t := 'a M.t - module Make2(M : Basic2) : S2 with type ('a,'s) t := ('a,'s) M.t + + module Make (M : Basic) : S with type 'a t := 'a M.t + module Make2 (M : Basic2) : S2 with type ('a, 's) t := ('a, 's) M.t + module State : sig module type S = sig - type ('a,'s) t + type ('a, 's) t type 'a result - include Monad.S2 with type ('a,'s) t := ('a,'s) t - val put : 's -> (unit,'s) t - val get : unit -> ('s,'s) t - val gets : ('s -> 'r) -> ('r,'s) t - val update : ('s -> 's) -> (unit,'s) t - val modify : ('a,'s) t -> ('s -> 's) -> ('a,'s) t - val run : ('a,'s) t -> 's -> ('a * 's) result - val eval : ('a,'s) t -> 's -> 'a result - val exec : ('a,'s) t -> 's -> 's result + + include Monad.S2 with type ('a, 's) t := ('a, 's) t + + val put : 's -> (unit, 's) t + val get : unit -> ('s, 's) t + val gets : ('s -> 'r) -> ('r, 's) t + val update : ('s -> 's) -> (unit, 's) t + val modify : ('a, 's) t -> ('s -> 's) -> ('a, 's) t + val run : ('a, 's) t -> 's -> ('a * 's) result + val eval : ('a, 's) t -> 's -> 'a result + val exec : ('a, 's) t -> 's -> 's result end - include S with type 'a result = 'a - and type ('a,'e) t = ('a,'e) Monads.Std.Monad.State.t + + include + S + with type 'a result = 'a + and type ('a, 'e) t = ('a, 'e) Monads.Std.Monad.State.t end + module T : sig module Option : sig - module Make (M : S ) : S with type 'a t = 'a option M.t - module Make2(M : S2) : S2 with type ('a,'b) t = ('a option,'b) M.t + module Make (M : S) : S with type 'a t = 'a option M.t + module Make2 (M : S2) : S2 with type ('a, 'b) t = ('a option, 'b) M.t end + module Or_error : sig - module Make (M : S ) : S with type 'a t = 'a Or_error.t M.t - module Make2(M : S2) : S2 with type ('a,'b) t = ('a Or_error.t,'b) M.t + module Make (M : S) : S with type 'a t = 'a Or_error.t M.t + + module Make2 (M : S2) : + S2 with type ('a, 'b) t = ('a Or_error.t, 'b) M.t end + module Result : sig - module Make(M : S) : S2 with type ('a,'e) t = ('a,'e) Result.t M.t + module Make (M : S) : S2 with type ('a, 'e) t = ('a, 'e) Result.t M.t end + module State : sig - module Make(M : S) : State.S with type 'a result = 'a M.t + module Make (M : S) : State.S with type 'a result = 'a M.t end end end @@ -678,557 +661,506 @@ module Std : sig (**/**) - (** Lazy sequence *) - module Seq : module type of Seq - with type 'a t = 'a Base.Sequence.t - (** type abbreviation for ['a Sequence.t] *) + module Seq : module type of Seq with type 'a t = 'a Base.Sequence.t + (** Lazy sequence *) + type 'a seq = 'a Seq.t [@@deriving bin_io, compare, sexp] + (** type abbreviation for ['a Sequence.t] *) - (** Constructs a trie *) + (** Constructs a trie *) module Trie : sig - - (** Key requirements. - Key is a sequence of tokens of the specified length. - It is better to use contiguous data structures, like - arrays as keys, otherwise you can end up with a slow - implementation (i.e., don't use lists or sequences as - keys, use strings, bitstrings, arrays, etc). *) + (** Key requirements. Key is a sequence of tokens of the specified length. + It is better to use contiguous data structures, like arrays as keys, + otherwise you can end up with a slow implementation (i.e., don't use + lists or sequences as keys, use strings, bitstrings, arrays, etc). *) module type Key = sig - (** the type of key *) type t + (** the type of key *) - (** type of token must implement bin_prot, be comparable and - sexpable *) type token [@@deriving bin_io, compare, sexp] + (** type of token must implement bin_prot, be comparable and sexpable *) - (** [length key] return the amount of tokens in a [key] *) val length : t -> int + (** [length key] return the amount of tokens in a [key] *) - (** [nth_token key n] the [n]'th token of key. Should be O(1) *) val nth_token : t -> int -> token + (** [nth_token key n] the [n]'th token of key. Should be O(1) *) - (** [hash_token tok] efficient hash function for the [token] type. - If nothing efficient came to mind, just use [Hashtbl.hash]. *) val token_hash : token -> int + (** [hash_token tok] efficient hash function for the [token] type. If + nothing efficient came to mind, just use [Hashtbl.hash]. *) end (** Prefix trie interface. - Trie is a mutable table that can be seen as a specialized - form of a hash table. + Trie is a mutable table that can be seen as a specialized form of a hash + table. - Use the [Trie.Make] functor to create modules that implement - this signature. Some modules also provide an implementation - of this signature under a [Trie] name, e.g., [Bitvector.Trie], - [Bil.Trie], [Insn.Trie], etc. See also a [Trie.String] module - below, that is a specialized implementation of a trie data - structure with string keys. - - *) + Use the [Trie.Make] functor to create modules that implement this + signature. Some modules also provide an implementation of this signature + under a [Trie] name, e.g., [Bitvector.Trie], [Bil.Trie], [Insn.Trie], + etc. See also a [Trie.String] module below, that is a specialized + implementation of a trie data structure with string keys. *) module V1 : sig module type S = sig - (** trie can store arbitrary data *) type 'a t [@@deriving bin_io, sexp] + (** trie can store arbitrary data *) - (** a key type that is used to lookup data *) type key + (** a key type that is used to lookup data *) - (** [create ()] creates new empty trie *) val create : unit -> 'a t + (** [create ()] creates new empty trie *) - (** [add trie ~key ~data] associates [data] with [key]. If - [trie] already has some value associated with [key], then - the value will be overwritten (rebound) *) val add : 'a t -> key:key -> data:'a -> unit + (** [add trie ~key ~data] associates [data] with [key]. If [trie] + already has some value associated with [key], then the value will be + overwritten (rebound) *) + val change : 'a t -> key -> ('a option -> 'a option) -> unit (** [change trie key f] if trie has [data] associated with [key] then [f] will be called with [Some data], otherwise it will be called with [None]. If [f] returns [None] then there will be no data associated with [key], if [f] returns [Some thing], then [thing] will be associated with [key] *) - val change : 'a t -> key -> ('a option -> 'a option) -> unit - (** [find trie key] finds data associated with [key] *) val find : 'a t -> key -> 'a option + (** [find trie key] finds data associated with [key] *) - (** [walk trie key ~init ~f] walks down the tree starting from the - root and ending with the last token of the key. Function [f] - is fold over values associated with all substrings of the key, - starting from a zero substring. *) val walk : 'a t -> key -> init:'b -> f:('b -> 'a option -> 'b) -> 'b + (** [walk trie key ~init ~f] walks down the tree starting from the root + and ending with the last token of the key. Function [f] is fold over + values associated with all substrings of the key, starting from a + zero substring. *) - (** [remove trie key] removes value bound with [key] if any. *) val remove : 'a t -> key -> unit + (** [remove trie key] removes value bound with [key] if any. *) - (** [longest_match trie key] find a value associated with a - longest substring of [key]. Returns a pair - a length of - matched key and the value, associated with that key. *) val longest_match : 'a t -> key -> (int * 'a) option + (** [longest_match trie key] find a value associated with a longest + substring of [key]. Returns a pair - a length of matched key and the + value, associated with that key. *) - (** [length trie] returns the number of values in [trie] *) val length : 'a t -> int + (** [length trie] returns the number of values in [trie] *) - (** [pp pp_val] creates a printer for a given value printer - [pp_val]. Example: + val pp : + (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit + (** [pp pp_val] creates a printer for a given value printer [pp_val]. + Example: [let int_trie = String.Trie.pp pp_int] will create a printer for a [String.Trie] that is populated by - integers. *) - val pp : (Format.formatter -> 'a -> unit) -> (Format.formatter -> 'a t -> unit) + integers. *) end end - module V2 : sig - - (** Extended version the [V1.S]. *) + module V2 : sig + (** Extended version the [V1.S]. *) module type S = sig include V1.S type token - - (** [fold trie init f] folds over all elements of trie. - - The function [f x key data] is applied to all elements - of [trie] which were previously added. The [key] is - represented as a list of tokens. - *) val fold : 'a t -> init:'b -> f:('b -> token list -> 'a -> 'b) -> 'b + (** [fold trie init f] folds over all elements of trie. + The function [f x key data] is applied to all elements of [trie] + which were previously added. The [key] is represented as a list of + tokens. *) + val iter : 'a t -> f:(token list -> 'a -> unit) -> unit (** [iter trie f] iterates over all element of trie. - The function [f key data] is applied to all elements - of [trie] which were previously added. The [key] is - represented as a list of tokens. + The function [f key data] is applied to all elements of [trie] which + were previously added. The [key] is represented as a list of tokens. *) - val iter : 'a t -> f:(token list -> 'a -> unit) -> unit - - - (** [make_printer print_tokens print_data] create a trie printer. - - Creates a function that will print a trie using - [print_token] to print the key in the form of a token - list, and [print_data] to print data associated with - keys. - Only those keys that have associated data are printed - (i.e., those that were added to the trie). - - Each key value pair is printed as using the following - format specification: ["@[<2>%a@ %a@]@;"]. So it is - advised to print it in a vertical box. - *) val make_printer : (Format.formatter -> token list -> unit) -> (Format.formatter -> 'a -> unit) -> - (Format.formatter -> 'a t -> unit) + Format.formatter -> + 'a t -> + unit + (** [make_printer print_tokens print_data] create a trie printer. + + Creates a function that will print a trie using [print_token] to + print the key in the form of a token list, and [print_data] to print + data associated with keys. + Only those keys that have associated data are printed (i.e., those + that were added to the trie). + + Each key value pair is printed as using the following format + specification: ["@[<2>%a@ %a@]@;"]. So it is advised to print it in + a vertical box. *) end end module type S = V1.S + (** Create a trie for a given [Key] *) + module Make (Key : Key) : + V2.S with type key = Key.t and type token = Key.token - (** Create a trie for a given [Key] *) - module Make(Key : Key) : V2.S with type key = Key.t - and type token = Key.token - - (** Minimum required interface for a token data type *) + (** Minimum required interface for a token data type *) module type Token = sig - type t [@@deriving bin_io, compare, sexp] + type t [@@deriving bin_io, compare, sexp] + val hash : t -> int end - (** Prefix and suffix tries for specified token types. *) + (** Prefix and suffix tries for specified token types. *) module Array : sig - module Prefix(Tok : Token) : V2.S with type key = Tok.t array - and type token = Tok.t - module Suffix(Tok : Token) : V2.S with type key = Tok.t array - and type token = Tok.t + module Prefix (Tok : Token) : + V2.S with type key = Tok.t array and type token = Tok.t + + module Suffix (Tok : Token) : + V2.S with type key = Tok.t array and type token = Tok.t end - (** Predefined prefix and suffix string tries. *) + (** Predefined prefix and suffix string tries. *) module String : sig module Prefix : V2.S with type key = string and type token = char module Suffix : V2.S with type key = string and type token = char end end - (** Balanced Interval Tree. - Interval trees are used to build efficient mappings from - intervals to arbitrary data. + Interval trees are used to build efficient mappings from intervals to + arbitrary data. - The interval tree may contain overlapping intervals and allows - inserting and removing elements. + The interval tree may contain overlapping intervals and allows inserting + and removing elements. The interval tree is implemented using AVL trees. - @since 1.4 - *) + @since 1.4 *) module Interval_tree : sig - - (** Abstract Interval. - Abstractly an interval is a pair of points, with one point - being the lower bound and another is the upper bound. The upper - bound shall be greater or equal than the lower bound, i.e., + Abstractly an interval is a pair of points, with one point being the + lower bound and another is the upper bound. The upper bound shall be + greater or equal than the lower bound, i.e., [compare (lower x) (upper x) <= 0] for all intervals [x]. - The interval [x] represents a set of points that are greater - or equal than the [lower x] and less than or equal than [upper x]. - (Thus, empty intervals are not representable). - *) + The interval [x] represents a set of points that are greater or equal + than the [lower x] and less than or equal than [upper x]. (Thus, empty + intervals are not representable). *) module type Interval = sig - - (** interval representation *) type t [@@deriving compare, sexp_of] + (** interval representation *) - (** point representation *) type point [@@deriving compare, sexp_of] + (** point representation *) - (** the lower bound of an interval *) val lower : t -> point + (** the lower bound of an interval *) - (** the upper bound of an interval *) val upper : t -> point + (** the upper bound of an interval *) end (** The Interval Tree Interface. - Interval tree is a mapping from intervals to arbitrary - values. The intervals are allowed to intersect. Thus a - single point may belong to more than one - interval. Unlike a regular map, when an association is extract - by using a key value, the interval tree uses notions of - domination and intersection to extract values associated with - all intervals that either dominate (i.e., are super sets) or - intersects with the provided key. In that sense an interval tree is - a multimap. - *) + Interval tree is a mapping from intervals to arbitrary values. The + intervals are allowed to intersect. Thus a single point may belong to + more than one interval. Unlike a regular map, when an association is + extract by using a key value, the interval tree uses notions of + domination and intersection to extract values associated with all + intervals that either dominate (i.e., are super sets) or intersects with + the provided key. In that sense an interval tree is a multimap. *) module type S = sig - - (** interval tree abstract representation *) type 'a t [@@deriving sexp_of] + (** interval tree abstract representation *) - (** the interval *) type key + (** the interval *) - (** an element of the interval *) type point + (** an element of the interval *) - (** [empty x] an empty interval tree *) val empty : 'a t + (** [empty x] an empty interval tree *) - (** [singleton k x] creates an interval tree that has only one - mapping - from the key [k] to data [x] *) val singleton : key -> 'a -> 'a t + (** [singleton k x] creates an interval tree that has only one mapping - + from the key [k] to data [x] *) + val least : 'a t -> point option (** [least t] returns the least bound of the tree [t]. Returns [None] if [t] is empty. *) - val least : 'a t -> point option + val greatest : 'a t -> point option (** [greatest t] returns the greatest bound of the tree [t]. Returns [None] if [t] is empty. *) - val greatest : 'a t -> point option - (** [min_bining t] returns the least binding in the tree *) val min_binding : 'a t -> (key * 'a) option + (** [min_bining t] returns the least binding in the tree *) - (** [max_binding t] returns the greatest binding in the tree *) val max_binding : 'a t -> (key * 'a) option + (** [max_binding t] returns the greatest binding in the tree *) - (** [add t k x] adds a new binding (k,x) to the mapping. *) val add : 'a t -> key -> 'a -> 'a t + (** [add t k x] adds a new binding (k,x) to the mapping. *) - (** [dominators t k] returns all intervals and their associated - values that include [k]. *) val dominators : 'a t -> key -> (key * 'a) Sequence.t + (** [dominators t k] returns all intervals and their associated values + that include [k]. *) - (** [intersections t k] returns all intervals and their associated - values that intersects with [k] *) val intersections : 'a t -> key -> (key * 'a) Sequence.t - - (** [intersects t k] is [true] iff [t] contains an interval + (** [intersections t k] returns all intervals and their associated values that intersects with [k] *) + val intersects : 'a t -> key -> bool + (** [intersects t k] is [true] iff [t] contains an interval that + intersects with [k] *) - (** [dominates t k] is [true] iff all intervals in [t] are - included in [k]. *) val dominates : 'a t -> key -> bool + (** [dominates t k] is [true] iff all intervals in [t] are included in + [k]. *) - (** [contains t p] is [true] if [p] belongs to at least one - interval in [t] *) val contains : 'a t -> point -> bool + (** [contains t p] is [true] if [p] belongs to at least one interval in + [t] *) - (** [lookup t p] returns bindings of all intervals that - contain the given point *) val lookup : 'a t -> point -> (key * 'a) Sequence.t + (** [lookup t p] returns bindings of all intervals that contain the given + point *) - (** [map k ~f] maps all data values with the function [f] *) val map : 'a t -> f:('a -> 'b) -> 'b t + (** [map k ~f] maps all data values with the function [f] *) - (** [mapi k ~f] maps all bindings with the function [f] *) val mapi : 'a t -> f:(key -> 'a -> 'b) -> 'b t + (** [mapi k ~f] maps all bindings with the function [f] *) - (** [filter t ~f] returns a tree where all elements for which - [f] returned [false] are removed. *) val filter : 'a t -> f:('a -> bool) -> 'a t + (** [filter t ~f] returns a tree where all elements for which [f] returned + [false] are removed. *) - (** [filter t ~f] returns a tree where all elements for which - [f] returned [None] are removed and all others are mapped. *) val filter_map : 'a t -> f:('a -> 'b option) -> 'b t + (** [filter t ~f] returns a tree where all elements for which [f] returned + [None] are removed and all others are mapped. *) - (** [filter t ~f] returns a tree where all elements for which - [f] returned [None] are removed and all others are mapped. *) val filter_mapi : 'a t -> f:(key -> 'a -> 'b option) -> 'b t + (** [filter t ~f] returns a tree where all elements for which [f] returned + [None] are removed and all others are mapped. *) - (** [remove t k] removes all bindings to the key [k] *) val remove : 'a t -> key -> 'a t + (** [remove t k] removes all bindings to the key [k] *) - (** [remove_intersections t k] removes all bindings that - intersect with the key [k]. *) val remove_intersections : 'a t -> key -> 'a t + (** [remove_intersections t k] removes all bindings that intersect with + the key [k]. *) - (** [remove_dominators t k] removes all bindings that are - included (dominated by) in the interval [k] *) val remove_dominators : 'a t -> key -> 'a t + (** [remove_dominators t k] removes all bindings that are included + (dominated by) in the interval [k] *) - (** [to_sequence t] returns all bindings in [t] *) val to_sequence : 'a t -> (key * 'a) Sequence.t + (** [to_sequence t] returns all bindings in [t] *) - - (** Interval Trees implement common container interface *) include Container.S1 with type 'a t := 'a t + (** Interval Trees implement common container interface *) end + (** [Make(Interval)] create an abstract interval tree data type that uses + abstract [Interval]. *) + module Make (Interval : Interval) : + S with type key := Interval.t and type point := Interval.point - (** [Make(Interval)] create an abstract interval tree data type - that uses abstract [Interval]. - *) - module Make(Interval : Interval) : S - with type key := Interval.t - and type point := Interval.point - - (** Binable Abstract Interval. + (** Binable Abstract Interval. - An extension of the Interval signature with the - Binable interface. - *) + An extension of the Interval signature with the Binable interface. *) module type Interval_binable = sig type t [@@deriving bin_io, compare, sexp] type point [@@deriving bin_io, compare, sexp] + include Interval with type t := t and type point := point end (** Binable Interval Tree. - An extension of the Interval tree signature with the - Binable interface. + An extension of the Interval tree signature with the Binable interface. *) module type S_binable = sig type 'a t [@@deriving bin_io, compare, sexp] + include S with type 'a t := 'a t end - (** [Make_binable(Interval)] create an abstract interval tree data type - that uses abstract [Interval] and can be serialized via the Binable - interface. - *) - module Make_binable(Interval : Interval_binable) : S_binable - with type key := Interval.t - and type point := Interval.point - + (** [Make_binable(Interval)] create an abstract interval tree data type that + uses abstract [Interval] and can be serialized via the Binable + interface. *) + module Make_binable (Interval : Interval_binable) : + S_binable with type key := Interval.t and type point := Interval.point end - type value [@@deriving bin_io, compare, sexp] - type dict [@@deriving bin_io, compare, sexp] + type value [@@deriving bin_io, compare, sexp] + type dict [@@deriving bin_io, compare, sexp] - (** Type to represent machine word *) type word [@@deriving bin_io, compare, sexp] + (** Type to represent machine word *) - (** A synonym for [word], that should be used for words - that are addresses *) type addr = word [@@deriving bin_io, compare, sexp] + (** A synonym for [word], that should be used for words that are addresses *) - (** Type safe operand and register sizes. *) + (** Type safe operand and register sizes. *) module Size : sig - (** Defines possible sizes for operations operands *) - type all = [ - | `r8 - | `r16 - | `r32 - | `r64 - | `r128 - | `r256 - ] [@@deriving variants] - - type 'a p = 'a constraint 'a = [< all] - [@@deriving bin_io, compare, sexp] + type all = [ `r8 | `r16 | `r32 | `r64 | `r128 | `r256 ] + [@@deriving variants] + (** Defines possible sizes for operations operands *) - type t = all p - [@@deriving bin_io, compare, sexp] + type 'a p = 'a constraint 'a = [< all ] [@@deriving bin_io, compare, sexp] + type t = all p [@@deriving bin_io, compare, sexp] (** {3 Lifting from int} *) - (** [of_int n] return [Ok `rn] if [`rn] exists, [Error] - otherwise. *) val of_int : int -> t Or_error.t + (** [of_int n] return [Ok `rn] if [`rn] exists, [Error] otherwise. *) - (** [of_int_exn n] the same as [of_int], but raises exception - instead of returning [Error] *) val of_int_exn : int -> t + (** [of_int_exn n] the same as [of_int], but raises exception instead of + returning [Error] *) - (** [of_int_opt n] the same as [of_int] but uses [option] type - instead of [Or_error.t] *) val of_int_opt : int -> t option + (** [of_int_opt n] the same as [of_int] but uses [option] type instead of + [Or_error.t] *) - (** [addr_of_int n] return [Ok `rn] if [`rn] exists, [Error] - otherwise. *) val addr_of_int : int -> [ `r32 | `r64 ] Or_error.t + (** [addr_of_int n] return [Ok `rn] if [`rn] exists, [Error] otherwise. *) + val addr_of_int_exn : int -> [ `r32 | `r64 ] (** [addr_of_int_exn n] the same as [addr_of_int], but raises exception instead of returning [Error] *) - val addr_of_int_exn : int -> [ `r32 | `r64 ] + val addr_of_int_opt : int -> [ `r32 | `r64 ] option (** [addr_of_int_opt n] the same as [addr_of_int] but uses [option] type instead of [Or_error.t] *) - val addr_of_int_opt : int -> [ `r32 | `r64 ] option val addr_of_word_size : Word_size.t -> [ `r32 | `r64 ] - val word_of_addr_size : [ `r32 | `r64 ] -> Word_size.t - val to_addr_size : t -> [ `r32 | `r64 ] Or_error.t + val in_bits : 'a p -> int (** [in_bits size] returns size in bits. *) - val in_bits : 'a p -> int - (** [in_bytes sz] returns size in bytes *) val in_bytes : 'a p -> int + (** [in_bytes sz] returns size in bytes *) include Regular.S with type t := t end - (** size of operand *) - type size = Size.t - [@@deriving bin_io, compare, sexp] + type size = Size.t [@@deriving bin_io, compare, sexp] + (** size of operand *) - (** size of address *) - type addr_size = [ `r32 | `r64 ] Size.p - [@@deriving bin_io, compare, sexp] + type addr_size = [ `r32 | `r64 ] Size.p [@@deriving bin_io, compare, sexp] + (** size of address *) (** Bitvector -- an integer with modular arithmentics. - {2 Overview } + {2 Overview} - A numeric value with the 2-complement binary representation. It - is good for representing addresses, offsets and other arithmetic - values. + A numeric value with the 2-complement binary representation. It is good + for representing addresses, offsets and other arithmetic values. - Each value is attributed by a bitwidth and signedness. All - arithmetic operations over values are done modulo their - widths. It is an error to apply arithmetic operation to values - with different widths. Default implementations will raise a an - exception, however there exists a family of modules that provide - arithmetic operations lifted to an [Or_error.t] monad. It is - suggested to use them, if you know what kind of operands you're - expecting. + Each value is attributed by a bitwidth and signedness. All arithmetic + operations over values are done modulo their widths. It is an error to + apply arithmetic operation to values with different widths. Default + implementations will raise a an exception, however there exists a family + of modules that provide arithmetic operations lifted to an [Or_error.t] + monad. It is suggested to use them, if you know what kind of operands + you're expecting. {2:bv_signs Clarification on signs} - By default, all are numbers represented with bitvectors are - considered unsigned. This includes the ordering, e.g., [of_int - (-1) ~width:32] is greater than [of_int 0 ~width:32]. If you - need to perform a signed operation, you can use the [signed] - operator create a signed word with the same value. + By default, all are numbers represented with bitvectors are considered + unsigned. This includes the ordering, e.g., [of_int (-1) ~width:32] is + greater than [of_int 0 ~width:32]. If you need to perform a signed + operation, you can use the [signed] operator create a signed word with the + same value. - If any operand of a binary operation is signed, then a signed - version of an operation is used, i.e., the other operand is - upcasted to the signed kind. + If any operand of a binary operation is signed, then a signed version of + an operation is used, i.e., the other operand is upcasted to the signed + kind. Remember to use explicit casts, whenever you really need a signed representation. Examples: {[ let x = of_int ~-6 ~width:8 - let y = to_int x (* y = 250 *) + let y = to_int x (* y = 250 *) let z = to_int (signed x) (* z = ~-6 *) let zero = of_int 0 ~width:8 - let p = x < zero (* p = false *) - let q = signed x < zero (* p = true *) + let p = x < zero (* p = false *) + let q = signed x < zero (* p = true *) ]} - {2:bv_sizes Clarification on size-morphism } + {2:bv_sizes Clarification on size-morphism} - Size-monomorphic operations (as opposed to size-polymorphic) - expect operands of the same size. When applied to operands of - different sizes they either raise exceptions or return - an [Error] variant as the result. All arithmetic operations are - size-monomorphic and we provide interface that use either - exceptions or [Result.t] to indicate the outcome. + Size-monomorphic operations (as opposed to size-polymorphic) expect + operands of the same size. When applied to operands of different sizes + they either raise exceptions or return an [Error] variant as the result. + All arithmetic operations are size-monomorphic and we provide interface + that use either exceptions or [Result.t] to indicate the outcome. - The comparison operation is size-polymorphic by default and - takes the size of the bitvector into account. Bitvectors - with equal values but different sizes are unequal. The precise - order matches with the order of pairs, where the first - constituent is the bitvector value, and the second is its size, - for example, the following sequence is in an ascending order: + The comparison operation is size-polymorphic by default and takes the size + of the bitvector into account. Bitvectors with equal values but different + sizes are unequal. The precise order matches with the order of pairs, + where the first constituent is the bitvector value, and the second is its + size, for example, the following sequence is in an ascending order: - {[ 0x0:1, 0x0:32, 0x0:64, 0x1:1, 0x1:32, 0xD:4, 0xDEADBEEF:32]}. + {[ + 0x0:1, 0x0:32, 0x0:64, 0x1:1, 0x1:32, 0xD:4, 0xDEADBEEF:32 + ]} + . - A size-monomorphic interfaced is exposed in a [Mono] submodule. So - if you want a monomorphic map, then just use [Mono.Map] module. - Note, [Mono] submodule doesn't provide [Table], since we cannot - guarantee that all keys in a hash-table have equal size. The - order functions provided by the Mono module will raise an - exception when applied to bitvectors with different sizes. + A size-monomorphic interfaced is exposed in a [Mono] submodule. So if you + want a monomorphic map, then just use [Mono.Map] module. Note, [Mono] + submodule doesn't provide [Table], since we cannot guarantee that all keys + in a hash-table have equal size. The order functions provided by the Mono + module will raise an exception when applied to bitvectors with different + sizes. - In the default and [Mono] orders, if either of two values is - signed (see {!bv_signs}) then the values will be ordered as - 2-complement signed integers. + In the default and [Mono] orders, if either of two values is signed (see + {!bv_signs}) then the values will be ordered as 2-complement signed + integers. Another alternative orders are [Signed_value_order], - [Unsigned_value_order], and [Literal_order]. They will be - briefly described below. - - [Signed_value_order] is size-polymoprhic and it simply - ignores the sizes of bitvectors and orders them by values, e.g., - the following bitvectors are ordered in the [Value.Signed] - order, [FF:8; 0:1; 0F:8; FF:32], and [0:1] is equal to - [0:32]. See {!bv_sizes} for more details on the signedness of - operations. Note, that the size of a word still affects the + [Unsigned_value_order], and [Literal_order]. They will be briefly + described below. + + [Signed_value_order] is size-polymoprhic and it simply ignores the sizes + of bitvectors and orders them by values, e.g., the following bitvectors + are ordered in the [Value.Signed] order, [FF:8; 0:1; 0F:8; FF:32], and + [0:1] is equal to [0:32]. See {!bv_sizes} for more details on the + signedness of operations. Note, that the size of a word still affects the order since it defines the position of the most significant bit. - [Unsigned_value_order] ignores the sign and the size of - words and compares them by the unsigned order of their values. - he following numbers are ordered with the [Unsigned_value_order] - order, [0:1, 1:32, 0F:8 FF:8], and [FF:32] is equal to [FF:8]. - [Unsigned_value_order] is faster than then any previously - described order and is useful when the size of the words should - be ignored (or is known to be equal and therefore could be - ignored). + [Unsigned_value_order] ignores the sign and the size of words and compares + them by the unsigned order of their values. he following numbers are + ordered with the [Unsigned_value_order] order, [0:1, 1:32, 0F:8 FF:8], and + [FF:32] is equal to [FF:8]. [Unsigned_value_order] is faster than then any + previously described order and is useful when the size of the words should + be ignored (or is known to be equal and therefore could be ignored). - [Literal_order] is the fastest order that takes into account - all constituents of bitvectors, like if we will treat a - bitvector as triple of its value, size, and sign and order - bitvectors using the lexicographical order. + [Literal_order] is the fastest order that takes into account all + constituents of bitvectors, like if we will treat a bitvector as triple of + its value, size, and sign and order bitvectors using the lexicographical + order. - - {2:bv_string Clarification on string representation } + {2:bv_string Clarification on string representation} As a part of [Identifiable] interface bitvector provides a pair of complement functions: [to_string] and [of_string], that provides - facilities to store bitvector as a human readable string, and to - restore it from string. The format of the representation is the - following (in EBNF): + facilities to store bitvector as a human readable string, and to restore + it from string. The format of the representation is the following (in + EBNF): {[ repr = [sign], [base], digit, {digit}, ":", size, [kind] sign = "+" | "-"; @@ -1241,38 +1173,34 @@ module Std : sig kind = u | s ]} - Examples: - [0x5D:32s, 0b0101:16u, 5:64, +5:8, +0x5D:16]. + Examples: [0x5D:32s, 0b0101:16u, 5:64, +5:8, +0x5D:16]. - If [base] is omitted base-10 is assumed. If the kind is omitted, - then the usigned kind is assumed. The output format is always in - a hex representation with a full prefix. . *) + If [base] is omitted base-10 is assumed. If the kind is omitted, then the + usigned kind is assumed. The output format is always in a hex + representation with a full prefix. . *) module Bitvector : sig - - (** [word] is an abbreviation to [Bitvector.t] *) type t = word + (** [word] is an abbreviation to [Bitvector.t] *) + include Regular.S with type t := t (** {2 Common Interfaces} - A bitvector is a value, first of all, so it supports a common - set of a value interface: it can be stored, compared, it can - be a key in a dictionary, etc. Moreover, being a number it can - be compared with zero and applied to a common set of integer - operations. *) - include Regular.S with type t := t + A bitvector is a value, first of all, so it supports a common set of a + value interface: it can be stored, compared, it can be a key in a + dictionary, etc. Moreover, being a number it can be compared with zero + and applied to a common set of integer operations. *) - (** Bitvector implements a common set of operations that are - expected from integral values. *) include Integer.S with type t := t + (** Bitvector implements a common set of operations that are expected from + integral values. *) - (** The comparable interface with size-monomorphic comparison. *) module Mono : Comparable with type t := t + (** The comparable interface with size-monomorphic comparison. *) (** Compare by value, ignore size, but take into account the sign. See {!bv_sizes} for more information. - @since 2.5.0 - *) + @since 2.5.0 *) module Signed_value_order : sig include Binable.S with type t = t include Comparable.S_binable with type t := t @@ -1299,140 +1227,128 @@ module Std : sig include Hashable.S_binable with type t := t end - (** Specifies the order of bytes in a word. *) type endian = - | LittleEndian (** least significant byte comes first *) - | BigEndian (** most significant byte comes first *) + | LittleEndian (** least significant byte comes first *) + | BigEndian (** most significant byte comes first *) [@@deriving bin_io, compare, sexp] (** {2 Constructors} *) - - (** [create v w] creates a word from bitvector [v] of width [w].*) val create : Bitvec.t -> int -> t + (** [create v w] creates a word from bitvector [v] of width [w].*) - + val code_addr : Theory.Target.t -> Bitvec.t -> t (** [code_addr t x] uses target's address size to create a word. Same as [create x (Theory.Target.code_addr_size t)]. - @since 2.2.0 - *) - val code_addr : Theory.Target.t -> Bitvec.t -> t - + @since 2.2.0 *) + val data_addr : Theory.Target.t -> Bitvec.t -> t (** [data_addr t x] uses target's code address size to create a word. Same as [create x (Theory.Target.data_addr_size t)]. - @since 2.2.0 - *) - val data_addr : Theory.Target.t -> Bitvec.t -> t + @since 2.2.0 *) + val data_word : Theory.Target.t -> Bitvec.t -> t (** [data_word t x] uses target's word size to create a word. Same as [create x (Theory.Target.bits t)]. - @since 2.2.0 - *) - val data_word : Theory.Target.t -> Bitvec.t -> t - + @since 2.2.0 *) - (** [of_string s] parses a bitvector from a string representation - defined in section {!bv_string}. *) val of_string : string -> t + (** [of_string s] parses a bitvector from a string representation defined in + section {!bv_string}. *) - (** [of_bool x] is a bitvector with length [1] and value [b0] if - [x] is false and [b1] otherwise. *) - val of_bool : bool -> t + val of_bool : bool -> t + (** [of_bool x] is a bitvector with length [1] and value [b0] if [x] is + false and [b1] otherwise. *) - (** [of_int ~width n] creates a bitvector of the specified - bit-[width] with the value equal to [n]. If bits of the [n] - that doesn't fit into [width] are ignored. *) - val of_int : width:int -> int -> t + val of_int : width:int -> int -> t + (** [of_int ~width n] creates a bitvector of the specified bit-[width] with + the value equal to [n]. If bits of the [n] that doesn't fit into [width] + are ignored. *) - (** [of_int32 ?width n] creates a bitvector of the specified - bit-[width] with the value equal to [n]. If bits of the [n] - that doesn't fit into [width] are ignored. Parameter [width] - defaults to [32]. *) val of_int32 : ?width:int -> int32 -> t + (** [of_int32 ?width n] creates a bitvector of the specified bit-[width] + with the value equal to [n]. If bits of the [n] that doesn't fit into + [width] are ignored. Parameter [width] defaults to [32]. *) - (** [of_int32 ?width n] creates a bitvector of the specified - bit-[width] with the value equal to [n]. If bits of the [n] - that doesn't fit into [width] are ignored. Parameter [width] - defaults to [32]. *) val of_int64 : ?width:int -> int64 -> t + (** [of_int32 ?width n] creates a bitvector of the specified bit-[width] + with the value equal to [n]. If bits of the [n] that doesn't fit into + [width] are ignored. Parameter [width] defaults to [32]. *) - (** {2 Some predefined constant constructors } *) + (** {2 Some predefined constant constructors} *) - (** [b0 = of_bool false] *) val b0 : t + (** [b0 = of_bool false] *) - (** [b1 = of_bool true] *) val b1 : t + (** [b1 = of_bool true] *) - (** {2 Helpful shortcuts } *) + (** {2 Helpful shortcuts} *) + val one : int -> t (** [one width] number one with a specified [width], is a shortcut for [of_int 1 ~width]*) - val one: int -> t + val zero : int -> t (** [zero width] zero with a specified [width], is a shortcut for [of_int 0 ~width]*) - val zero: int -> t - (** [ones width] is a number with a specified [width], and all bits - set to 1. It is a shortcut for [lnot (zero width)]*) val ones : int -> t + (** [ones width] is a number with a specified [width], and all bits set to + 1. It is a shortcut for [lnot (zero width)]*) + val of_binary : ?width:int -> endian -> string -> t (** [of_binary ?width endian num] creates a bitvector from a string interpreted as a sequence of bytes in a specified order. - The result is always positive and unsigned. The [num] argument is - not shared. [width] defaults to the length of [num] in bits, - i.e. [8 * String.length num]. *) - val of_binary : ?width:int -> endian -> string -> t + The result is always positive and unsigned. The [num] argument is not + shared. [width] defaults to the length of [num] in bits, i.e. + [8 * String.length num]. *) - (** {2 Conversions to OCaml built in integer types } *) + (** {2 Conversions to OCaml built in integer types} *) - (** [to_bitvec x] returns a Bitvec represenation of [x] *) val to_bitvec : t -> Bitvec.t + (** [to_bitvec x] returns a Bitvec represenation of [x] *) - (** [to_int x] projects [x] in to OCaml [int]. *) - val to_int : t -> int Or_error.t + val to_int : t -> int Or_error.t + (** [to_int x] projects [x] in to OCaml [int]. *) - (** [to_int32 x] projects [x] in to [int32] *) val to_int32 : t -> int32 Or_error.t + (** [to_int32 x] projects [x] in to [int32] *) - (** [to_int64 x] projects [x] in to [int64] *) val to_int64 : t -> int64 Or_error.t + (** [to_int64 x] projects [x] in to [int64] *) + val to_int_exn : t -> int (** [to_int_exn x] projects [x] in to OCaml [int]. @since 1.3 *) - val to_int_exn : t -> int + val to_int32_exn : t -> int32 (** [to_int32_exn x] projects [x] in to [int32] @since 1.3 *) - val to_int32_exn : t -> int32 + val to_int64_exn : t -> int64 (** [to_int64_exn x] projects [x] in to [int64] @since 1.3 *) - val to_int64_exn : t -> int64 - (** [printf "%a" pp x] prints [x] into a formatter. This is - a default printer, controlled by - [set_default_printer]. Multiple formats are available, see the - [available_writers] for the actual list of formats and - a format description. Out of box it defaults to [pp_hex_full]. - Note, the [printf] function from examples refers to the - [Format.printf], thus it is assumed that the [Format] module - is open in the scope. *) val pp : Format.formatter -> t -> unit + (** [printf "%a" pp x] prints [x] into a formatter. This is a default + printer, controlled by [set_default_printer]. Multiple formats are + available, see the [available_writers] for the actual list of formats + and a format description. Out of box it defaults to [pp_hex_full]. Note, + the [printf] function from examples refers to the [Format.printf], thus + it is assumed that the [Format] module is open in the scope. *) + val pp_hex : Format.formatter -> t -> unit (** [printf "%a" pp_hex x] prints [x] in the hexadecimal format omitting - suffixes, and the prefix if it is not necessary. - Example, + suffixes, and the prefix if it is not necessary. Example, {[ # printf "%a\n" pp_hex (Word.of_int32 0xDEADBEEFl);; @@ -1440,13 +1356,11 @@ module Std : sig # printf "%a\n" pp_hex (Word.of_int32 0x1);; 1 ]} - @since 1.3 - *) - val pp_hex : Format.formatter -> t -> unit + @since 1.3 *) + val pp_dec : Format.formatter -> t -> unit (** [printf "%a" pp_dec x] prints [x] in the decimal format omitting - suffixes and prefixes. - Example, + suffixes and prefixes. Example, {[ # printf "%a\n" pp_dec (Word.of_int32 0xDEADBEEFl);; @@ -1454,13 +1368,11 @@ module Std : sig # printf "%a\n" pp_dec (Word.of_int32 0x1);; 1 ]} - @since 1.3 - *) - val pp_dec : Format.formatter -> t -> unit + @since 1.3 *) - (** [printf "%a" pp_oct x] prints [x] in the octal format omitting - suffixes, and the prefix if it is not necessary. - Example, + val pp_oct : Format.formatter -> t -> unit + (** [printf "%a" pp_oct x] prints [x] in the octal format omitting suffixes, + and the prefix if it is not necessary. Example, {[ # printf "%a\n" pp_oct (Word.of_int32 0xDEADBEEFl);; @@ -1468,11 +1380,10 @@ module Std : sig # printf "%a\n" pp_oct (Word.of_int32 0x1);; 1 ]} *) - val pp_oct : Format.formatter -> t -> unit - (** [printf "%a" pp_bin x] prints [x] in the binary (0 and 1) format omitting - suffixes, and the prefix if it is not necessary. - Example, + val pp_bin : Format.formatter -> t -> unit + (** [printf "%a" pp_bin x] prints [x] in the binary (0 and 1) format + omitting suffixes, and the prefix if it is not necessary. Example, {[ # printf "%a\n" pp_bin (Word.of_int32 0xDEADBEEFl);; @@ -1480,13 +1391,11 @@ module Std : sig # printf "%a\n" pp_bin (Word.of_int32 0x1);; 1 ]} - @since 1.3 - *) - val pp_bin : Format.formatter -> t -> unit + @since 1.3 *) + val pp_hex : Format.formatter -> t -> unit (** [printf "%a" pp_hex x] prints [x] in the hexadecimal format omitting - suffixes, and the prefix if it is not necessary. - Example, + suffixes, and the prefix if it is not necessary. Example, {[ # printf "%a\n" pp_hex (Word.of_int32 0xDEADBEEFl);; @@ -1494,11 +1403,10 @@ module Std : sig # printf "%a\n" pp_hex (Word.of_int32 0x1);; 1 ]} *) - val pp_hex : Format.formatter -> t -> unit + val pp_dec : Format.formatter -> t -> unit (** [printf "%a" pp_dec x] prints [x] in the decimal format omitting - suffixes and prefixes. - Example, + suffixes and prefixes. Example, {[ # printf "%a\n" pp_dec (Word.of_int32 0xDEADBEEFl);; @@ -1506,13 +1414,11 @@ module Std : sig # printf "%a\n" pp_dec (Word.of_int32 0x1);; 1 ]} - @since 1.3 - *) - val pp_dec : Format.formatter -> t -> unit + @since 1.3 *) - (** [printf "%a" pp_oct x] prints [x] in the octal format omitting - suffixes, and the prefix if it is not necessary. - Example, + val pp_oct : Format.formatter -> t -> unit + (** [printf "%a" pp_oct x] prints [x] in the octal format omitting suffixes, + and the prefix if it is not necessary. Example, {[ # printf "%a\n" pp_oct (Word.of_int32 0xDEADBEEFl);; @@ -1520,13 +1426,11 @@ module Std : sig # printf "%a\n" pp_oct (Word.of_int32 0x1);; 1 ]} - @since 1.3 - *) - val pp_oct : Format.formatter -> t -> unit + @since 1.3 *) - (** [printf "%a" pp_bin x] prints [x] in the binary (0 and 1) - format omitting suffixes, and the prefix if it is not necessary. - Example, + val pp_bin : Format.formatter -> t -> unit + (** [printf "%a" pp_bin x] prints [x] in the binary (0 and 1) format + omitting suffixes, and the prefix if it is not necessary. Example, {[ # printf "%a\n" pp_bin (Word.of_int32 0xDEADBEEFl);; @@ -1534,13 +1438,11 @@ module Std : sig # printf "%a\n" pp_bin (Word.of_int32 0x1);; 1 ]} - @since 1.3 - *) - val pp_bin : Format.formatter -> t -> unit + @since 1.3 *) + val pp_hex_full : Format.formatter -> t -> unit (** [printf "%a" pp_hex_full x] prints [x] in the hexadecimal format with - suffixes, and the prefix if it is necessary. - Example, + suffixes, and the prefix if it is necessary. Example, {[ # printf "%a\n" pp_hex_full (Word.of_int32 0xDEADBEEFl);; @@ -1548,11 +1450,10 @@ module Std : sig # printf "%a\n" pp_hex_full (Word.of_int32 0x1);; 1:32u ]} *) - val pp_hex_full : Format.formatter -> t -> unit + val pp_dec_full : Format.formatter -> t -> unit (** [printf "%a" pp_dec_full x] prints [x] in the decimal format with - suffixes and prefixes. - Example, + suffixes and prefixes. Example, {[ # printf "%a\n" pp_dec_full (Word.of_int32 0xDEADBEEFl);; @@ -1560,13 +1461,11 @@ module Std : sig # printf "%a\n" pp_dec_full (Word.of_int32 0x1);; 1:32u ]} - @since 1.3 - *) - val pp_dec_full : Format.formatter -> t -> unit + @since 1.3 *) + val pp_oct_full : Format.formatter -> t -> unit (** [printf "%a" pp_oct_full x] prints [x] in the octal format with - suffixes, and the prefix if it is necessary. - Example, + suffixes, and the prefix if it is necessary. Example, {[ # printf "%a\n" pp_oct_full (Word.of_int32 0xDEADBEEFl);; @@ -1574,11 +1473,10 @@ module Std : sig # printf "%a\n" pp_oct_full (Word.of_int32 0x1);; 1:32u ]} *) - val pp_oct_full : Format.formatter -> t -> unit - (** [printf "%a" pp_bin_full x] prints [x] in the binary (0 and 1) - format omitting suffixes, and the prefix if it is necessary. - Example, + val pp_bin_full : Format.formatter -> t -> unit + (** [printf "%a" pp_bin_full x] prints [x] in the binary (0 and 1) format + omitting suffixes, and the prefix if it is necessary. Example, {v # printf "%a\n" pp_bin_full (Word.of_int32 0xDEADBEEFl);; @@ -1586,217 +1484,210 @@ module Std : sig # printf "%a\n" pp_bin_full (Word.of_int32 0x1);; 1:32u v} *) - val pp_bin_full : Format.formatter -> t -> unit - (** [pp_generic ?case ?prefix ?suffix ?format ppf x] - a printer - to suit all tastes. - - Note: this is a generic printer factory that should be used if - none of the nine preinstantiated suits you. - - @param prefix defines whether or not a number is prefixed: - - [`auto] (default) - a prefix that corresponds to the chosen - format is printed if it is necessary to disambiguate a - number from a decimal representation; - - [`base] - a corresponding prefix is always printed; - - [`none] - the prefix is never printed; - - [`this p] - the user specified prefix [p] is always - printed; - - @param suffix defines how the suffix should be printed: - - [`none] (default) - the suffix is never printed; - - [`full] - a full suffix that denotes size and signedness - is printed, e.g., [0xDE:32s] is a signed integer modulo [32]. - - [`size] - only the modulo is printed, e.g., [0xDE:32s] is - printed as [0xDE:32] - - @param format defines the textual representation format: - - [hex] (default) - hexadecimal - - [dec] - decimal - - [oct] - octal - - [bin] - binary (0 and 1). - - @param case defines the case of hexadecimal letters - *) val pp_generic : - ?case:[`upper | `lower ] -> - ?prefix:[`auto | `base | `none | `this of string ] -> - ?suffix:[`none | `full | `size ] -> - ?format:[`hex | `dec | `oct | `bin ] -> - Format.formatter -> t -> unit - - (** [string_of_value ?hex x] returns a textual representation of - the [x] value, i.e., ignores size and signedness. If [hex] is - [true] (default), then it is in the hexadecimal - representation, otherwise the decimal representation is - used. The returned value is not prefixed. No leading zeros are - printed. If a value is signed and negative, then a leading - negative sign is printed. Hexadecimal letter literals are - printed in a lowercase format. *) + ?case:[ `upper | `lower ] -> + ?prefix:[ `auto | `base | `none | `this of string ] -> + ?suffix:[ `none | `full | `size ] -> + ?format:[ `hex | `dec | `oct | `bin ] -> + Format.formatter -> + t -> + unit + (** [pp_generic ?case ?prefix ?suffix ?format ppf x] - a printer to suit all + tastes. + + Note: this is a generic printer factory that should be used if none of + the nine preinstantiated suits you. + + @param prefix + defines whether or not a number is prefixed: + - [`auto] (default) - a prefix that corresponds to the chosen format + is printed if it is necessary to disambiguate a number from a + decimal representation; + - [`base] - a corresponding prefix is always printed; + - [`none] - the prefix is never printed; + - [`this p] - the user specified prefix [p] is always printed; + + @param suffix + defines how the suffix should be printed: + - [`none] (default) - the suffix is never printed; + - [`full] - a full suffix that denotes size and signedness is printed, + e.g., [0xDE:32s] is a signed integer modulo [32]. + - [`size] - only the modulo is printed, e.g., [0xDE:32s] is printed as + [0xDE:32] + + @param format + defines the textual representation format: + - [hex] (default) - hexadecimal + - [dec] - decimal + - [oct] - octal + - [bin] - binary (0 and 1). + + @param case defines the case of hexadecimal letters *) + val string_of_value : ?hex:bool -> t -> string + (** [string_of_value ?hex x] returns a textual representation of the [x] + value, i.e., ignores size and signedness. If [hex] is [true] (default), + then it is in the hexadecimal representation, otherwise the decimal + representation is used. The returned value is not prefixed. No leading + zeros are printed. If a value is signed and negative, then a leading + negative sign is printed. Hexadecimal letter literals are printed in a + lowercase format. *) - (** [signed t] casts t to a signed type, so that any operations - applied on [t] will be signed. *) val signed : t -> t + (** [signed t] casts t to a signed type, so that any operations applied on + [t] will be signed. *) - (** [unsigned t] casts [t] to an unsigned type, so that any - operations applied to it will interpret [t] as an unsigned - word. @since 1.3 *) val unsigned : t -> t + (** [unsigned t] casts [t] to an unsigned type, so that any operations + applied to it will interpret [t] as an unsigned word. + @since 1.3 *) - (** [is_zero bv] is true iff all bits are set to zero. *) val is_zero : t -> bool + (** [is_zero bv] is true iff all bits are set to zero. *) - (** [is_ones bv] is true if the least significant bit is equal to one *) val is_one : t -> bool + (** [is_ones bv] is true if the least significant bit is equal to one *) - (** [bitwidth bv] return a bit-width, i.e., the amount of bits *) val bitwidth : t -> int + (** [bitwidth bv] return a bit-width, i.e., the amount of bits *) - (** [extract bv ~hi ~lo] extracts a subvector from [bv], starting - from bit [hi] and ending with [lo]. Bits are enumerated from - right to left (from least significant to most), starting from - zero. [hi] maybe greater than [size]. + val extract : ?hi:int -> ?lo:int -> t -> t Or_error.t + (** [extract bv ~hi ~lo] extracts a subvector from [bv], starting from bit + [hi] and ending with [lo]. Bits are enumerated from right to left (from + least significant to most), starting from zero. [hi] maybe greater than + [size]. - [hi] defaults to [width bv - 1] - [lo] defaults to [0]. + [hi] defaults to [width bv - 1] [lo] defaults to [0]. Example: - [extract (of_int 17 ~width:8) ~hi:4 ~lo:3] - will result in a two bit vector consisting of the forth and - third bits, i.e., equal to a number [2]. + [extract (of_int 17 ~width:8) ~hi:4 ~lo:3] will result in a two bit + vector consisting of the forth and third bits, i.e., equal to a number + [2]. - [lo] and [hi] should be non-negative numbers. [lo] must be less - then a [width bv] and [hi >= lo]. *) - val extract : ?hi:int -> ?lo:int -> t -> t Or_error.t + [lo] and [hi] should be non-negative numbers. [lo] must be less then a + [width bv] and [hi >= lo]. *) - (** [extract_exn bv ~hi ~lo] is the same as [extract], but will raise - an exception on error. *) val extract_exn : ?hi:int -> ?lo:int -> t -> t + (** [extract_exn bv ~hi ~lo] is the same as [extract], but will raise an + exception on error. *) - (** [concat b1 b2] concatenates two bitvectors *) val concat : t -> t -> t + (** [concat b1 b2] concatenates two bitvectors *) + val ( @. ) : t -> t -> t (** [b1 @. b2] is [concat b1 b2] *) - val (@.): t -> t -> t + val succ : t -> t (** [succ n] returns next value after [n]. It is not guaranteed that [signed (succ n) > signed n]*) - val succ : t -> t - (** [pred n] returns a value preceding [n]. *) val pred : t -> t + (** [pred n] returns a value preceding [n]. *) - (** [nsucc m n] is [Fn.apply_n_times ~n succ m], but more - efficient. *) val nsucc : t -> int -> t + (** [nsucc m n] is [Fn.apply_n_times ~n succ m], but more efficient. *) - (** [npred m n] is [Fn.apply_n_times ~n pred addr], but more - efficient. *) val npred : t -> int -> t + (** [npred m n] is [Fn.apply_n_times ~n pred addr], but more efficient. *) - (** [a ++ n] is [nsucc a n] *) - val (++) : t -> int -> t + val ( ++ ) : t -> int -> t + (** [a ++ n] is [nsucc a n] *) - (** [a -- n] is [npred a n] *) - val (--) : t -> int -> t + val ( -- ) : t -> int -> t + (** [a -- n] is [npred a n] *) - (** [gcd x y] is the greatest common divisor of [x] and [y] - in the integers. Note that this is not always the greatest - common divisor in the bitvectors of fixed length. For example, - in the 32-bit unsigned integers, [2 = 2 + 2^32 = 2(1 + 2^31)]. - Thus, [1 + 2^31] is a divisor of [2], even though [gcd 2 2 = 2]. - Two properties that still hold are: - 1. Both [x] and [y] are multiples of [gcd x y], and - 2. [gcd x y <= min (abs x) (abs y)] *) val gcd : t -> t -> t Or_error.t + (** [gcd x y] is the greatest common divisor of [x] and [y] in the integers. + Note that this is not always the greatest common divisor in the + bitvectors of fixed length. For example, in the 32-bit unsigned + integers, [2 = 2 + 2^32 = 2(1 + 2^31)]. Thus, [1 + 2^31] is a divisor of + [2], even though [gcd 2 2 = 2]. Two properties that still hold are: 1. + Both [x] and [y] are multiples of [gcd x y], and 2. + [gcd x y <= min (abs x) (abs y)] *) - (** [lcm x y] is the least common multiple of [x] and [y] - in the integers. Note that, like [gcd x y], this is not - always the least common multiple of [x] and [y] in the fixed- - length bitvectors. See the [gcd] documentation for an example. - The result of this function will always be some common multiple - of the inputs, even in the fixed-width bitvectors. *) val lcm : t -> t -> t Or_error.t + (** [lcm x y] is the least common multiple of [x] and [y] in the integers. + Note that, like [gcd x y], this is not always the least common multiple + of [x] and [y] in the fixed- length bitvectors. See the [gcd] + documentation for an example. The result of this function will always be + some common multiple of the inputs, even in the fixed-width bitvectors. + *) - (** [gcdext x y] returns [(g, s, t)] where [g = gcd x y] and - [g = s*x + t*y]. See the documentation for [gcd x y] for - why this function is tricky to use. *) val gcdext : t -> t -> (t * t * t) Or_error.t + (** [gcdext x y] returns [(g, s, t)] where [g = gcd x y] and + [g = s*x + t*y]. See the documentation for [gcd x y] for why this + function is tricky to use. *) - (** [gcd_exn x y] is the same as [gcd], but will raise - an exception on error. *) val gcd_exn : t -> t -> t + (** [gcd_exn x y] is the same as [gcd], but will raise an exception on + error. *) - (** [lcm_exn x y] is the same as [lcm], but will raise - an exception on error. *) val lcm_exn : t -> t -> t + (** [lcm_exn x y] is the same as [lcm], but will raise an exception on + error. *) - (** [gcdext_exn x y] is the same as [gcdext], but will raise - an exception on error. *) val gcdext_exn : t -> t -> t * t * t + (** [gcdext_exn x y] is the same as [gcdext], but will raise an exception on + error. *) - (** {2 Iteration over bitvector components } *) + (** {2 Iteration over bitvector components} *) - (** [enum_bytes x order] returns a sequence of bytes of [x] in a - specified [order]. Each byte is represented as a [bitvector] - itself. *) - val enum_bytes : t -> endian -> t seq + val enum_bytes : t -> endian -> t seq + (** [enum_bytes x order] returns a sequence of bytes of [x] in a specified + [order]. Each byte is represented as a [bitvector] itself. *) - (** [enum_bytes x order] returns bytes of [x] in a specified [order], - with bytes represented by [char] type *) val enum_chars : t -> endian -> char seq + (** [enum_bytes x order] returns bytes of [x] in a specified [order], with + bytes represented by [char] type *) - (** [enum_bits x order] returns bits of [x] in a specified [order]. - [order] defines only the ordering of words in a bitvector, bits - will always be in MSB first order. The length of the sequence - is always a power of [8]. *) - val enum_bits : t -> endian -> bool seq + val enum_bits : t -> endian -> bool seq + (** [enum_bits x order] returns bits of [x] in a specified [order]. [order] + defines only the ordering of words in a bitvector, bits will always be + in MSB first order. The length of the sequence is always a power of [8]. + *) (** {3 Comparison with zero} - Note, we're not including [With_zero] interface, since - it refers to the `Sign` module, that is available only - in core_kernel >= 113.33.00. - *) + Note, we're not including [With_zero] interface, since it refers to the + `Sign` module, that is available only in core >= 113.33.00. *) - (** [validate_positive] validates that a value is positive. *) - val validate_positive : t Validate.check + val validate_positive : t Validate.check + (** [validate_positive] validates that a value is positive. *) - (** [validate_non_negative] validates that a value is non negative. *) val validate_non_negative : t Validate.check + (** [validate_non_negative] validates that a value is non negative. *) - (** [validate_negative] validates that a value is negative. *) - val validate_negative : t Validate.check + val validate_negative : t Validate.check + (** [validate_negative] validates that a value is negative. *) - (** [validate_non_positive] validates that a value is not positive. *) val validate_non_positive : t Validate.check + (** [validate_non_positive] validates that a value is not positive. *) - (** [is_positive x] is true if [x] is greater than zero. Always - true if [x] is unsigned. *) - val is_positive : t -> bool + val is_positive : t -> bool + (** [is_positive x] is true if [x] is greater than zero. Always true if [x] + is unsigned. *) - (** [is_non_negative x] is true if [x] is greater than or equal to - zero. Tautology if [x] is unsigned. *) val is_non_negative : t -> bool + (** [is_non_negative x] is true if [x] is greater than or equal to zero. + Tautology if [x] is unsigned. *) - (** [is_negative x] is true if [x] is strictly less than zero. It - is a contradiction if [x] is not signed. *) - val is_negative : t -> bool + val is_negative : t -> bool + (** [is_negative x] is true if [x] is strictly less than zero. It is a + contradiction if [x] is not signed. *) - (** [is_non_positive x] is true if [x] is less than zero. It is a - contradiction if [x] is not signed. *) val is_non_positive : t -> bool + (** [is_non_positive x] is true if [x] is less than zero. It is a + contradiction if [x] is not signed. *) - (** {2 Arithmetic raised into [Or_error] monad } + (** {2 Arithmetic raised into [Or_error] monad} - All binary integer operations are only well defined on operands - with equal sizes. + All binary integer operations are only well defined on operands with + equal sizes. - Module [Int] provides a set of integer operations that do not - raise exceptions, but return values raised to an Or_error - monad. + Module [Int] provides a set of integer operations that do not raise + exceptions, but return values raised to an Or_error monad. Example: @@ -1806,59 +1697,58 @@ module Std : sig [Z.(!$v1 + !$v2 / !$v3)]. *) module Int_err : sig + val ( !$ ) : t -> t Or_error.t + (** [!$v] lifts [v] to an Or_error monad. It is, essentially, the same as + [Ok v] *) - (** [!$v] lifts [v] to an Or_error monad. It is, essentially, the - same as [Ok v] *) - val (!$): t -> t Or_error.t + (** The following lifter will check that their operand has a corresponding + width. *) - (** The following lifter will check that their operand has a - corresponding width. *) + val i1 : t -> t Or_error.t + (** [i1 x] is [Ok x] iff [bitwidth x = 1] *) - (** [i1 x] is [Ok x] iff [bitwidth x = 1] *) - val i1 : t -> t Or_error.t + val i4 : t -> t Or_error.t + (** [i4 x] is [Ok x] iff [bitwidth x = 4] *) - (** [i4 x] is [Ok x] iff [bitwidth x = 4] *) - val i4 : t -> t Or_error.t + val i8 : t -> t Or_error.t + (** [i8 x] is [Ok x] iff [bitwidth x = 8] *) - (** [i8 x] is [Ok x] iff [bitwidth x = 8] *) - val i8 : t -> t Or_error.t - - (** [i16 x] is [Ok x] iff [bitwidth x = 16] *) val i16 : t -> t Or_error.t + (** [i16 x] is [Ok x] iff [bitwidth x = 16] *) - (** [i32 x] is [Ok x] iff [bitwidth x = 32] *) val i32 : t -> t Or_error.t + (** [i32 x] is [Ok x] iff [bitwidth x = 32] *) - (** [i64 x] is [Ok x] iff [bitwidth x = 64] *) val i64 : t -> t Or_error.t + (** [i64 x] is [Ok x] iff [bitwidth x = 64] *) - (** [int w v] will be [Ok] if [v] has width [w] *) val int : int -> t -> t Or_error.t + (** [int w v] will be [Ok] if [v] has width [w] *) - (** [of_word_size w] creates a lifter for a specified word size - [w], i.e. either [i64] or [i32] *) val of_word_size : Word_size.t -> t -> t Or_error.t + (** [of_word_size w] creates a lifter for a specified word size [w], i.e. + either [i64] or [i32] *) include Integer.S with type t = t Or_error.t include Legacy.Monad.Infix with type 'a t := 'a Or_error.t end + module Int_exn : Integer.S with type t = t (** Arithmetic that raises exceptions. - This module exposes a common integer interface with - operations not lifted into [Or_error] monad, but raising - [Width] exception if operands sizes mismatch. - *) - module Int_exn : Integer.S with type t = t + This module exposes a common integer interface with operations not + lifted into [Or_error] monad, but raising [Width] exception if operands + sizes mismatch. *) + module Unsafe : Integer.S with type t = t (** Arithmetic operations that doesn't check the widths.*) - module Unsafe : Integer.S with type t = t - (** Stable marshaling interface. *) + (** Stable marshaling interface. *) module Stable : sig module V1 : sig type nonrec t = t [@@deriving bin_io, compare, sexp] end + module V2 : sig type nonrec t = t [@@deriving bin_io, compare, sexp] end @@ -1868,812 +1758,752 @@ module Std : sig Bitvector comes with 4 predefined prefix trees: - - [Trie.Big.Bits] - big endian prefix tree, where each - token is a bit, and bitvector is tokenized from msb to lsb. + - [Trie.Big.Bits] - big endian prefix tree, where each token is a bit, + and bitvector is tokenized from msb to lsb. - - [Trie.Big.Byte] - big endian prefix tree, where each token - is a byte, and bitvector is tokenized from most significant - byte to less significant + - [Trie.Big.Byte] - big endian prefix tree, where each token is a byte, + and bitvector is tokenized from most significant byte to less + significant - [Trie.Little.Bits] - is a little endian bit tree. - [Trie.Little.Byte] - is a little endian byte tree. *) module Trie : sig module Big : sig - module Bits : Trie.S with type key = t + module Bits : Trie.S with type key = t module Bytes : Trie.S with type key = t end + module Little : sig - module Bits : Trie.S with type key = t + module Bits : Trie.S with type key = t module Bytes : Trie.S with type key = t end end end - (** Expose [endian] constructors to [Bap.Std] namespace *) - type endian = Bitvector.endian = - LittleEndian | BigEndian + (** Expose [endian] constructors to [Bap.Std] namespace *) + type endian = Bitvector.endian = LittleEndian | BigEndian [@@deriving sexp, bin_io, compare] - (** Shortcut for bitvectors that represent words *) - module Word : module type of Bitvector - with type t = word - and type endian = endian - and type comparator_witness = Bitvector.comparator_witness - - (** Shortcut for bitvectors that represent addresses *) - module Addr : sig - include module type of Bitvector - with type t = addr + (** Shortcut for bitvectors that represent words *) + module Word : + module type of Bitvector + with type t = word and type endian = endian and type comparator_witness = Bitvector.comparator_witness - (** [memref ?disp ?index ?scale base] mimics a memory reference syntax - in gas assembler, [dis(base,index,scale)] - assembler operation. It returns address at - [base + index * scale + dis]. + (** Shortcut for bitvectors that represent addresses *) + module Addr : sig + include + module type of Bitvector + with type t = addr + and type endian = endian + and type comparator_witness = Bitvector.comparator_witness + + val memref : ?disp:int -> ?index:int -> ?scale:size -> addr -> addr + (** [memref ?disp ?index ?scale base] mimics a memory reference syntax in + gas assembler, [dis(base,index,scale)] assembler operation. It returns + address at [base + index * scale + dis]. @param disp stands for displacement and defaults to [0] @param index defaults for [0] @param scale defaults to [`r8] - All operations are taken modulo {% $2^n$ %}, - where [n = bitwidth base]. *) - val memref : ?disp:int -> ?index:int -> ?scale:size -> addr -> addr + All operations are taken modulo [$2^n$], where [n = bitwidth base]. *) end (** Main BIL module. The module specifies Binary Instruction Language (BIL). A language to - define a semantics of instructions. The semantics of the BIL - language is defined at [[1]]. - - The language is defined using algebraic types. For each BIL - constructor a smart constructor is defined with the same (if - syntax allows) name. This allows to use BIL as a DSL embedded - into OCaml: - - {[Bil.([ - v := src lsr i32 1; - r := src; - s := i32 31; - while_ (var v <> i32 0) [ - r := var r lsl i32 1; - r := var r lor (var v land i32 1); - v := var v lsr i32 1; - s := var s - i32 1; - ]; - dst := var r lsl var s; - ])]} - - where [i32] is defined as - [let i32 x = Bil.int (Word.of_int ~width:32 x)] - and [v,r,s] are some variables of type [var]; and - [src, dst] are expressions of type [exp]. - - @see - - [[1]]: BIL Semantics. - *) + define a semantics of instructions. The semantics of the BIL language is + defined at [[1]]. + + The language is defined using algebraic types. For each BIL constructor a + smart constructor is defined with the same (if syntax allows) name. This + allows to use BIL as a DSL embedded into OCaml: + + {[ + Bil. + [ + v := src lsr i32 1; + r := src; + s := i32 31; + while_ + (var v <> i32 0) + [ + r := var r lsl i32 1; + r := var r lor (var v land i32 1); + v := var v lsr i32 1; + s := var s - i32 1; + ]; + dst := var r lsl var s; + ] + ]} + + where [i32] is defined as [let i32 x = Bil.int (Word.of_int ~width:32 x)] + and [v,r,s] are some variables of type [var]; and [src, dst] are + expressions of type [exp]. + + @see + [[1]]: BIL Semantics. *) module Bil : sig module Types : sig type var (** Different forms of casting *) type cast = - | UNSIGNED (** 0-padding widening cast. *) - | SIGNED (** Sign-extending widening cast. *) - | HIGH (** Narrowing cast. Keeps the high bits. *) - | LOW (** Narrowing cast. Keeps the low bits. *) + | UNSIGNED (** 0-padding widening cast. *) + | SIGNED (** Sign-extending widening cast. *) + | HIGH (** Narrowing cast. Keeps the high bits. *) + | LOW (** Narrowing cast. Keeps the low bits. *) [@@deriving bin_io, compare, sexp] (** Binary operations implemented in the BIL *) type binop = - | PLUS (** Integer addition. (commutative, associative) *) - | MINUS (** Subtract second integer from first. *) - | TIMES (** Integer multiplication. (commutative, associative) *) + | PLUS (** Integer addition. (commutative, associative) *) + | MINUS (** Subtract second integer from first. *) + | TIMES (** Integer multiplication. (commutative, associative) *) | DIVIDE (** Unsigned integer division. *) - | SDIVIDE (** Signed integer division. *) - | MOD (** Unsigned modulus. *) - | SMOD (** Signed modulus. *) + | SDIVIDE (** Signed integer division. *) + | MOD (** Unsigned modulus. *) + | SMOD (** Signed modulus. *) | LSHIFT (** Left shift. *) | RSHIFT (** Right shift, zero padding. *) - | ARSHIFT (** Right shift, sign extend. *) - | AND (** Bitwise and. (commutative, associative) *) - | OR (** Bitwise or. (commutative, associative) *) - | XOR (** Bitwise xor. (commutative, associative) *) - | EQ (** Equals. (commutative) (associative on booleans) *) - | NEQ (** Not equals. (commutative) (associative on booleans) *) - | LT (** Unsigned less than. *) - | LE (** Unsigned less than or equal to. *) - | SLT (** Signed less than. *) - | SLE (** Signed less than or equal to. *) + | ARSHIFT (** Right shift, sign extend. *) + | AND (** Bitwise and. (commutative, associative) *) + | OR (** Bitwise or. (commutative, associative) *) + | XOR (** Bitwise xor. (commutative, associative) *) + | EQ (** Equals. (commutative) (associative on booleans) *) + | NEQ (** Not equals. (commutative) (associative on booleans) *) + | LT (** Unsigned less than. *) + | LE (** Unsigned less than or equal to. *) + | SLT (** Signed less than. *) + | SLE (** Signed less than or equal to. *) [@@deriving bin_io, compare, sexp] (** Unary operations implemented in the IR *) type unop = - | NEG (** Negate. (2's complement) *) - | NOT (** Bitwise not.(1's complement) *) + | NEG (** Negate. (2's complement) *) + | NOT (** Bitwise not.(1's complement) *) [@@deriving bin_io, compare, sexp] - (** BIL expression variants *) + (** BIL expression variants *) type exp = - | Load of exp * exp * endian * size (** load from memory *) - | Store of exp * exp * exp * endian * size (** store to memory *) - | BinOp of binop * exp * exp (** binary operation *) - | UnOp of unop * exp (** unary operation *) - | Var of var (** variable *) - | Int of word (** immediate value *) - | Cast of cast * int * exp (** casting *) - | Let of var * exp * exp (** let-binding *) - | Unknown of string * typ (** unknown or undefined value *) - | Ite of exp * exp * exp (** if-then-else expression *) - | Extract of int * int * exp (** extract portion of word *) - | Concat of exp * exp (** concatenate two words *) + | Load of exp * exp * endian * size (** load from memory *) + | Store of exp * exp * exp * endian * size (** store to memory *) + | BinOp of binop * exp * exp (** binary operation *) + | UnOp of unop * exp (** unary operation *) + | Var of var (** variable *) + | Int of word (** immediate value *) + | Cast of cast * int * exp (** casting *) + | Let of var * exp * exp (** let-binding *) + | Unknown of string * typ (** unknown or undefined value *) + | Ite of exp * exp * exp (** if-then-else expression *) + | Extract of int * int * exp (** extract portion of word *) + | Concat of exp * exp (** concatenate two words *) + and typ = - | Imm of int (** [Imm n] - n-bit immediate *) - | Mem of addr_size * size (** [Mem (a,t)] memory with a specifed addr_size *) + | Imm of int (** [Imm n] - n-bit immediate *) + | Mem of addr_size * size + (** [Mem (a,t)] memory with a specifed addr_size *) | Unk [@@deriving bin_io, compare, sexp] type stmt = - | Move of var * exp (** assign value of expression to variable *) - | Jmp of exp (** jump to absolute address *) - | Special of string (** Statement with semantics not expressible in BIL *) - | While of exp * stmt list (** while loops *) - | If of exp * stmt list * stmt list (** if/then/else statement *) - | CpuExn of int (** CPU exception *) + | Move of var * exp (** assign value of expression to variable *) + | Jmp of exp (** jump to absolute address *) + | Special of string + (** Statement with semantics not expressible in BIL *) + | While of exp * stmt list (** while loops *) + | If of exp * stmt list * stmt list (** if/then/else statement *) + | CpuExn of int (** CPU exception *) [@@deriving bin_io, compare, sexp] end - (** include all constructors into Bil namespace *) open Types - include module type of Types with type cast = cast - and type binop = binop - and type unop = unop - and type typ = typ - and type var = var - and type exp = exp - and type stmt = stmt - type t = stmt list - [@@deriving bin_io, compare, sexp] + (** include all constructors into Bil namespace *) + include + module type of Types + with type cast = cast + and type binop = binop + and type unop = unop + and type typ = typ + and type var = var + and type exp = exp + and type stmt = stmt + + type t = stmt list [@@deriving bin_io, compare, sexp] type var_compare - type vars = (var,var_compare) Set.t + type vars = (var, var_compare) Set.t include Printable.S with type t := t - include Data.S with type t := t - + include Data.S with type t := t + val domain : stmt list Knowledge.domain (** Bil is an instance of Domain. - A flat domain with the empty Bil program being the empty element. - *) - val domain : stmt list Knowledge.domain + A flat domain with the empty Bil program being the empty element. *) - (** Instance of the persistence class *) val persistent : stmt list Knowledge.persistent + (** Instance of the persistence class *) - (** The denotation of the program semantics as a BIL program. *) val slot : (Theory.Program.Semantics.cls, stmt list) Knowledge.slot + (** The denotation of the program semantics as a BIL program. *) + val code : (Theory.program, stmt list) KB.slot (** The representation of the program as a BIL program. @since 2.3.0 *) - val code : (Theory.program, stmt list) KB.slot - (** [printf "%a" pp_binop op] prints a binary operation [op]. *) val pp_binop : Format.formatter -> binop -> unit + (** [printf "%a" pp_binop op] prints a binary operation [op]. *) - (** [printf "%a" pp_unop op] prints an unary operation [op] *) val pp_unop : Format.formatter -> unop -> unit + (** [printf "%a" pp_unop op] prints an unary operation [op] *) - (** [printf "%a" pp_cast t] prints a cast type [t] - @since 1.3 - *) val pp_cast : Format.formatter -> cast -> unit + (** [printf "%a" pp_cast t] prints a cast type [t] + @since 1.3 *) - (** [string_of_binop op] is a textual representation of [op]. - @since 1.3 - *) val string_of_binop : binop -> string + (** [string_of_binop op] is a textual representation of [op]. + @since 1.3 *) - (** [string_of_unop op] is a textual representation of [op]. - @since 1.3 - *) val string_of_unop : unop -> string + (** [string_of_unop op] is a textual representation of [op]. + @since 1.3 *) - (** [string_of_cast t] is a textual representation of a cast type - @since 1.3 - *) val string_of_cast : cast -> string + (** [string_of_cast t] is a textual representation of a cast type + @since 1.3 *) - (** Infix operators *) + (** Infix operators *) module Infix : sig - - (** [x := y -> Move (x,y)] *) - val (:=) : var -> exp -> stmt + val ( := ) : var -> exp -> stmt + (** [x := y -> Move (x,y)] *) (** {2 Arithmetic operations} *) - (** [x + y -> BinOp (PLUS,x,y)] *) - val ( + ) : exp -> exp -> exp + val ( + ) : exp -> exp -> exp + (** [x + y -> BinOp (PLUS,x,y)] *) - (** [x - y -> BinOp(MINUS,x,y)] *) - val ( - ) : exp -> exp -> exp + val ( - ) : exp -> exp -> exp + (** [x - y -> BinOp(MINUS,x,y)] *) - (** [x * y -> BinOp(TIMES,x,y)] *) - val ( * ) : exp -> exp -> exp + val ( * ) : exp -> exp -> exp + (** [x * y -> BinOp(TIMES,x,y)] *) - (** [x / y -> BinOp(DIVIDE,x,y)] *) - val ( / ) : exp -> exp -> exp + val ( / ) : exp -> exp -> exp + (** [x / y -> BinOp(DIVIDE,x,y)] *) - (** [x /$ y -> BinOp(SDIVIDE,x,y)] *) - val ( /$ ) : exp -> exp -> exp + val ( /$ ) : exp -> exp -> exp + (** [x /$ y -> BinOp(SDIVIDE,x,y)] *) - (** [x mod y -> BinOp (MOD,x,y)] *) val ( mod ) : exp -> exp -> exp + (** [x mod y -> BinOp (MOD,x,y)] *) - (** [x %$ y -> BinOp (SMOD,x,y)] *) - val ( %$ ) : exp -> exp -> exp + val ( %$ ) : exp -> exp -> exp + (** [x %$ y -> BinOp (SMOD,x,y)] *) (** {2 Bit operations} *) - (** [x lsl y = BinOp (LSHIFT,x,y)] *) val ( lsl ) : exp -> exp -> exp + (** [x lsl y = BinOp (LSHIFT,x,y)] *) - (** [x lsr y = BinOp (RSHIFT,x,y)] *) val ( lsr ) : exp -> exp -> exp + (** [x lsr y = BinOp (RSHIFT,x,y)] *) - (** [x asr y = BinOp (ARSHIFT,x,y)] *) val ( asr ) : exp -> exp -> exp + (** [x asr y = BinOp (ARSHIFT,x,y)] *) - (** [x land y = BinOp (AND,x,y)] *) - val ( land) : exp -> exp -> exp + val ( land ) : exp -> exp -> exp + (** [x land y = BinOp (AND,x,y)] *) - (** [x lor y = BinOp (OR,x,y)] *) val ( lor ) : exp -> exp -> exp + (** [x lor y = BinOp (OR,x,y)] *) - (** [x lxor y = BinOp (XOR,x,y)] *) - val ( lxor) : exp -> exp -> exp + val ( lxor ) : exp -> exp -> exp + (** [x lxor y = BinOp (XOR,x,y)] *) - (** [lnot x = UnOp (NOT,x,y)] *) - val lnot : exp -> exp + val lnot : exp -> exp + (** [lnot x = UnOp (NOT,x,y)] *) (** {2 Equality tests} *) - (** [x = y -> BinOp(EQ,x,y)] *) - val ( = ) : exp -> exp -> exp + val ( = ) : exp -> exp -> exp + (** [x = y -> BinOp(EQ,x,y)] *) - (** [x = y -> BinOp(NEQ,x,y)] *) - val ( <> ) : exp -> exp -> exp + val ( <> ) : exp -> exp -> exp + (** [x = y -> BinOp(NEQ,x,y)] *) - (** [x < y -> BinOp(LT,x,y)] *) - val ( < ) : exp -> exp -> exp + val ( < ) : exp -> exp -> exp + (** [x < y -> BinOp(LT,x,y)] *) - (** [x > y -> Binop(LT,y,x) ] *) - val ( > ) : exp -> exp -> exp + val ( > ) : exp -> exp -> exp + (** [x > y -> Binop(LT,y,x) ] *) - (** [x <= y -> Binop(LE,x,y)] *) - val ( <= ) : exp -> exp -> exp + val ( <= ) : exp -> exp -> exp + (** [x <= y -> Binop(LE,x,y)] *) - (** [x <= y -> Binop(LE,y,x)] *) - val ( >= ) : exp -> exp -> exp + val ( >= ) : exp -> exp -> exp + (** [x <= y -> Binop(LE,y,x)] *) - (** {3 Signed comparison} *) + (** {3 Signed comparison} *) - (** [x <$ x -> Binop(SLT,x,y)] *) - val ( <$ ) : exp -> exp -> exp + val ( <$ ) : exp -> exp -> exp + (** [x <$ x -> Binop(SLT,x,y)] *) - (** [x >$ x -> Binop(SLT,y,x)] *) - val ( >$ ) : exp -> exp -> exp + val ( >$ ) : exp -> exp -> exp + (** [x >$ x -> Binop(SLT,y,x)] *) - (** [x <=$ x -> Binop(SLE,x,y)] *) val ( <=$ ) : exp -> exp -> exp + (** [x <=$ x -> Binop(SLE,x,y)] *) - (** [x >=$ x -> Binop(SLE,y,x)] *) val ( >=$ ) : exp -> exp -> exp + (** [x >=$ x -> Binop(SLE,y,x)] *) (** {2 Misc operations} *) + val ( ^ ) : exp -> exp -> exp (** [a ^ b -> Concat (a,b)] *) - val ( ^ ) : exp -> exp -> exp end - (** Brings infix operations into scope of the [Bil] module. *) include module type of Infix + (** Brings infix operations into scope of the [Bil] module. *) - (** {2 Functional constructors} *) + (** {2 Functional constructors} *) - (** [move v x -> Move (v,x)] *) val move : var -> exp -> stmt + (** [move v x -> Move (v,x)] *) - (** [jmp x -> Jmp x] *) val jmp : exp -> stmt + (** [jmp x -> Jmp x] *) - (** [special msg -> Special msg] *) val special : string -> stmt + (** [special msg -> Special msg] *) - - - (** [while_ cond stmts -> While (cond,stmts)] *) val while_ : exp -> stmt list -> stmt + (** [while_ cond stmts -> While (cond,stmts)] *) - (** [if_ cond s1 s2 -> If(cond,s1,s2)] *) val if_ : exp -> stmt list -> stmt list -> stmt + (** [if_ cond s1 s2 -> If(cond,s1,s2)] *) - (** [cpuexn number -> CpuExn number] *) val cpuexn : int -> stmt + (** [cpuexn number -> CpuExn number] *) - (** [unsigned -> UNSIGNED] *) val unsigned : cast + (** [unsigned -> UNSIGNED] *) - (** [signed -> SIGNED] *) val signed : cast + (** [signed -> SIGNED] *) - (** [high -> HIGH] *) val high : cast + (** [high -> HIGH] *) - (** [low -> LOW] *) val low : cast + (** [low -> LOW] *) - (** [plus -> PLUS] *) val plus : binop + (** [plus -> PLUS] *) - (** [minus -> MINUS] *) val minus : binop + (** [minus -> MINUS] *) - (** [times -> TIMES] *) val times : binop + (** [times -> TIMES] *) - (** [divide -> DIVIDE] *) val divide : binop + (** [divide -> DIVIDE] *) - (** [sdivide -> SDIVIDE] *) val sdivide : binop + (** [sdivide -> SDIVIDE] *) - (** [modulo -> MOD] *) val modulo : binop + (** [modulo -> MOD] *) - (** [smodulo -> SMOD] *) val smodulo : binop + (** [smodulo -> SMOD] *) - (** [lshift -> LSHIFT] *) val lshift : binop + (** [lshift -> LSHIFT] *) - (** [rshift -> RSHIFT] *) val rshift : binop + (** [rshift -> RSHIFT] *) - (** [arshift -> ARSHIFT] *) val arshift : binop + (** [arshift -> ARSHIFT] *) - (** [bit_and -> AND] *) val bit_and : binop + (** [bit_and -> AND] *) - (** [bit_or -> OR] *) - val bit_or : binop + val bit_or : binop + (** [bit_or -> OR] *) - (** [bit_xor -> XOR] *) val bit_xor : binop + (** [bit_xor -> XOR] *) - (** [eq -> EQ] *) val eq : binop + (** [eq -> EQ] *) - (** [neq -> NEQ] *) val neq : binop + (** [neq -> NEQ] *) - (** [lt -> LT] *) val lt : binop + (** [lt -> LT] *) - (** [le -> LE] *) val le : binop + (** [le -> LE] *) - (** [slt -> SLT] *) val slt : binop + (** [slt -> SLT] *) - (** [sle -> SLE] *) val sle : binop + (** [sle -> SLE] *) - (** [neg -> NEG] *) val neg : unop + (** [neg -> NEG] *) - (** [not -> NOT] *) val not : unop + (** [not -> NOT] *) - (** [load ~mem ~addr endian size -> Load (mem,addr,endian,size)] *) val load : mem:exp -> addr:exp -> endian -> size -> exp + (** [load ~mem ~addr endian size -> Load (mem,addr,endian,size)] *) - (** [store ~mem ~addr exp endian size -> Store(mem,addr,endian,size)] *) val store : mem:exp -> addr:exp -> exp -> endian -> size -> exp + (** [store ~mem ~addr exp endian size -> Store(mem,addr,endian,size)] *) - (** [binop op x y -> BinOp(op,x,y)] *) val binop : binop -> exp -> exp -> exp + (** [binop op x y -> BinOp(op,x,y)] *) - (** [unop op x -> UnOp(op,x)] *) val unop : unop -> exp -> exp + (** [unop op x -> UnOp(op,x)] *) - (** [var v -> Var v] *) val var : var -> exp + (** [var v -> Var v] *) - (** [int w -> Int w] *) val int : word -> exp + (** [int w -> Int w] *) - (** [cast t w x -> Cast (t,w,x)] *) val cast : cast -> int -> exp -> exp + (** [cast t w x -> Cast (t,w,x)] *) - (** [let_ var value expr -> Let(var,value,expr)] *) val let_ : var -> exp -> exp -> exp + (** [let_ var value expr -> Let(var,value,expr)] *) - (** [unknown msg typ -> Unknown(msg,typ)] *) val unknown : string -> typ -> exp + (** [unknown msg typ -> Unknown(msg,typ)] *) - (** [ite ~if_:cond ~then_:e1 ~else_:e2 -> Ite (cond,e1,e2)] *) val ite : if_:exp -> then_:exp -> else_:exp -> exp + (** [ite ~if_:cond ~then_:e1 ~else_:e2 -> Ite (cond,e1,e2)] *) - (** [extract ~hi ~lo x -> Extract (hi,lo,x)] *) val extract : hi:int -> lo:int -> exp -> exp + (** [extract ~hi ~lo x -> Extract (hi,lo,x)] *) - (** [concat x y -> Concat (x,y)] *) val concat : exp -> exp -> exp + (** [concat x y -> Concat (x,y)] *) - (** {2:bil_helpers BIL Helper functions} *) + (** {2:bil_helpers BIL Helper functions} *) - (** [is_referenced x p] is [true] if [x] is referenced in some - expression or statement in program [p], before it is - assigned. *) val is_referenced : var -> stmt list -> bool + (** [is_referenced x p] is [true] if [x] is referenced in some expression or + statement in program [p], before it is assigned. *) - (** [is_assigned x p] is [true] if there exists such [Move] - statement, that [x] occurs on the left side of it. If - [strict] is true, then only unconditional assignments are - accounted. By default, [strict] is [false] *) val is_assigned : ?strict:bool -> var -> stmt list -> bool - - (** [prune_unreferenced ?physicals ?virtuals ?such_that p] remove - all assignments to variables that are not used in the program - [p]. This is a local optimization. The variable is - unreferenced if it is not referenced in its lexical scope, or if - it is referenced after the assignment. A variable is pruned - only if it matches to one of the user specified kind, - described below (no variable matches the default values, so - by default nothing is pruned): - - [such_that] matches a variable [v] for which [such_that v] is - [true]; - - [physicals] matches all physical variables (i.e., registers - and memory locations). See {!Var.is_physical} for more - information. Note: passing [true] to this option is in general - unsound, unless you're absolutely sure, that physical - variables will not live out program [p]; - - [virtuals] matches all virtual variables (i.e., such variables - that were added to a program artificially and are not - represented physically in a program). See {!Var.is_virtual} - for more information on virtual variables. + (** [is_assigned x p] is [true] if there exists such [Move] statement, that + [x] occurs on the left side of it. If [strict] is true, then only + unconditional assignments are accounted. By default, [strict] is [false] *) + val prune_unreferenced : ?such_that:(var -> bool) -> ?physicals:bool -> ?virtuals:bool -> - stmt list -> stmt list + stmt list -> + stmt list + (** [prune_unreferenced ?physicals ?virtuals ?such_that p] remove all + assignments to variables that are not used in the program [p]. This is a + local optimization. The variable is unreferenced if it is not referenced + in its lexical scope, or if it is referenced after the assignment. A + variable is pruned only if it matches to one of the user specified kind, + described below (no variable matches the default values, so by default + nothing is pruned): + + [such_that] matches a variable [v] for which [such_that v] is [true]; + + [physicals] matches all physical variables (i.e., registers and memory + locations). See {!Var.is_physical} for more information. Note: passing + [true] to this option is in general unsound, unless you're absolutely + sure, that physical variables will not live out program [p]; + + [virtuals] matches all virtual variables (i.e., such variables that were + added to a program artificially and are not represented physically in a + program). See {!Var.is_virtual} for more information on virtual + variables. *) - (** [normalize_negatives p] transform [x + y] to [x - abs(y)] if [y < 0] *) val normalize_negatives : stmt list -> stmt list + (** [normalize_negatives p] transform [x + y] to [x - abs(y)] if [y < 0] *) - (** [substitute x y p] substitutes each occurrence of expression [x] by - expression [y] in program [p]. The mnemonic to remember the - order is to recall the sed's [s/in/out] syntax. *) val substitute : exp -> exp -> stmt list -> stmt list + (** [substitute x y p] substitutes each occurrence of expression [x] by + expression [y] in program [p]. The mnemonic to remember the order is to + recall the sed's [s/in/out] syntax. *) - (** [substitute_var x y p] substitutes all free occurrences of - variable [x] in program [p] by expression [y]. A variable is - free if it is not bounded in a preceding statement or not bound - with let expression. *) val substitute_var : var -> exp -> stmt list -> stmt list + (** [substitute_var x y p] substitutes all free occurrences of variable [x] + in program [p] by expression [y]. A variable is free if it is not + bounded in a preceding statement or not bound with let expression. *) - (** [free_vars bil] returns a set of free variables in program - [bil]. Variable is considered free if it is not bound in a - preceding statement or is not bound with [let] expression *) val free_vars : stmt list -> vars + (** [free_vars bil] returns a set of free variables in program [bil]. + Variable is considered free if it is not bound in a preceding statement + or is not bound with [let] expression *) - (** [fold_consts] evaluates constant expressions and statements. *) val fold_consts : stmt list -> stmt list + (** [fold_consts] evaluates constant expressions and statements. *) - (** [fixpoint f] applies transformation [f] until fixpoint is - reached. If the transformation orbit contains non-trivial cycles, - then the transformation will stop at an arbitrary point of a - cycle. *) - val fixpoint : (stmt list -> stmt list) -> (stmt list -> stmt list) + val fixpoint : (stmt list -> stmt list) -> stmt list -> stmt list + (** [fixpoint f] applies transformation [f] until fixpoint is reached. If + the transformation orbit contains non-trivial cycles, then the + transformation will stop at an arbitrary point of a cycle. *) - (** [propagate_consts bil] propagates consts from their reaching definitions. - The implementation computes reaching definition using inference style analysis, - overapproximates while cycles (doesn't compute the meet-over-paths solution), - and ignores memory locations. - @since 1.5 *) val propagate_consts : stmt list -> stmt list - - (** [prune_dead_virtuals bil] removes definitions of virtual variables that are - not live in the provided [bil] program. We assume that virtual variables are used - to represent temporaries, thus their removal is safe. The analysis over-approximates - the while loops, and won't remove any definition that occurs in a while loop body, - or which depends on it. The analysis doesn't track memory locations. + (** [propagate_consts bil] propagates consts from their reaching + definitions. The implementation computes reaching definition using + inference style analysis, overapproximates while cycles (doesn't compute + the meet-over-paths solution), and ignores memory locations. @since 1.5 *) - val prune_dead_virtuals : stmt list -> stmt list + val prune_dead_virtuals : stmt list -> stmt list + (** [prune_dead_virtuals bil] removes definitions of virtual variables that + are not live in the provided [bil] program. We assume that virtual + variables are used to represent temporaries, thus their removal is safe. + The analysis over-approximates the while loops, and won't remove any + definition that occurs in a while loop body, or which depends on it. The + analysis doesn't track memory locations. + @since 1.5 *) (** {3 BIL Special values} - The [Special] statement enables encoding of arbitrary - semantics using [encode attr values] and [decode attr] - to get the values back. The meaning of the [attr] and - [values] is specific to the user domain. - - Example, [encode call "malloc"], where - [call] is the BIL attribute that denotes a call to a - function. See {!call} for more information. - - *) + The [Special] statement enables encoding of arbitrary semantics using + [encode attr values] and [decode attr] to get the values back. The + meaning of the [attr] and [values] is specific to the user domain. + Example, [encode call "malloc"], where [call] is the BIL attribute that + denotes a call to a function. See {!call} for more information. *) (** BIL attributes. - BIL attributes serve the role of constructor for encoding - values as special statements. The attribute defines methods - for encoding and decoding values as a string as well as a - unique attribute name. + BIL attributes serve the role of constructor for encoding values as + special statements. The attribute defines methods for encoding and + decoding values as a string as well as a unique attribute name. - @since 2.3.0 - *) + @since 2.3.0 *) module Attribute : sig - - (** the type of attributes *) type 'a t + (** the type of attributes *) - - (** [declare ?package name ~encode ~decode] declares a new attribute. - - The attribute [package], [name] pair should be unique. If an - attribute with the given name is already registered the - registration will fail. *) val declare : ?package:string -> encode:('a -> string) -> decode:(string -> 'a) -> string -> 'a t - end - + (** [declare ?package name ~encode ~decode] declares a new attribute. - (** [encode attr value] encodes [value] as a special statement. + The attribute [package], [name] pair should be unique. If an attribute + with the given name is already registered the registration will fail. + *) + end - @since 2.3.0 *) val encode : 'a Attribute.t -> 'a -> stmt + (** [encode attr value] encodes [value] as a special statement. + @since 2.3.0 *) - (** [decode attr s] is [Some v] if [s] is [encode attr v]. - - @since 2.3.0 - *) val decode : 'a Attribute.t -> stmt -> 'a option + (** [decode attr s] is [Some v] if [s] is [encode attr v]. - (** [call] is the attribute name for encoding calls. + @since 2.3.0 *) - @since 2.3.0 - *) val call : string Attribute.t + (** [call] is the attribute name for encoding calls. + @since 2.3.0 *) + val intrinsic : string Attribute.t (** [intrinsic] is the attribute for intrinsic calls. - An intrinsic is a low-level, usually microarchitectural - operation. - - @since 2.3.0 - *) - val intrinsic: string Attribute.t - + An intrinsic is a low-level, usually microarchitectural operation. - (** [label] a named code location. + @since 2.3.0 *) - @since 2.4.0 *) val label : string Attribute.t + (** [label] a named code location. + @since 2.4.0 *) + val goto : string Attribute.t (** [goto] represents a control-flow transfer to a named label. @since 2.4.0 *) - val goto : string Attribute.t - - (** Core Theory specification of BIL. *) + (** Core Theory specification of BIL. *) module Theory : sig - - - (** [parser] the parser enables reflection of the bil statements - into core theory terms. To reflect a bil program [prog] into - the theory [Theory], use + val parser : (exp, _, stmt) Theory.Parser.t + (** [parser] the parser enables reflection of the bil statements into core + theory terms. To reflect a bil program [prog] into the theory + [Theory], use {[ - let module Parser = Theory.Parser.Make(Theory) in + let module Parser = Theory.Parser.Make (Theory) in Parser.run Bil.Theory.parser bil - ]} - *) - val parser : (exp,_,stmt) Theory.Parser.t + ]} *) end (** Maps BIL operators to bitvectors. - @since 1.3 - *) + @since 1.3 *) module Apply : sig - + val binop : binop -> word -> word -> word (** [binop op x y] applies [op] to [x] and [y]. - @before 2.5.0 precondition: the expression [BinOp(op,Intx,Int y)] - shall be well-typed. + @before 2.5.0 + precondition: the expression [BinOp(op,Intx,Int y)] shall be + well-typed. - @after 2.5.0 if [x] and [y] have different widths then they - are extended to the same width, which is the width of the - largest operand. If an operator is signed, then it will be - correctly sign-extended. - *) - val binop : binop -> word -> word -> word + after 2.5.0 if [x] and [y] have different widths then they are + extended to the same width, which is the width of the largest operand. + If an operator is signed, then it will be correctly sign-extended. *) - (** [unop op x] applies the unary operation [op] to [x]. *) val unop : unop -> word -> word + (** [unop op x] applies the unary operation [op] to [x]. *) - (** [cast t s x] casts [x] using the cast type [t] to the given - size [s]. - - precondition: the expression [Cast(t,s,Int x)] shall be - well-typed. - *) val cast : cast -> int -> word -> word + (** [cast t s x] casts [x] using the cast type [t] to the given size [s]. + + precondition: the expression [Cast(t,s,Int x)] shall be well-typed. *) end - (** Result of a computation. - @deprecated Use the Primus Framework. - *) type result - [@@deprecated "[since 2018-03] in favor of the Primus Framework"] + (** Result of a computation. + @deprecated Use the Primus Framework. *) (** An interface to a memory storage. - A storage is a mapping from addresses to bytes. For - consistency and efficiency bytes are still reprented with - bitvectors. + A storage is a mapping from addresses to bytes. For consistency and + efficiency bytes are still reprented with bitvectors. - Storages should not take care of aliasing or endianness, as they - are byte addressable. All memory operations are normalized by - Bili. + Storages should not take care of aliasing or endianness, as they are + byte addressable. All memory operations are normalized by Bili. - @deprecated Use the Primus Framework. - *) - class type storage = object('s) - - (** [load a] loads a byte from a a given address [a] *) + @deprecated Use the Primus Framework. *) + class type storage = object ('s) method load : addr -> word option + (** [load a] loads a byte from a a given address [a] *) - (** [save a w] stores byte [w] at address [a] *) method save : addr -> word -> 's + (** [save a w] stores byte [w] at address [a] *) end - [@@deprecated "[since 2018-03] in favor of the Primus Framework"] (** Predefined storage classes - @deprecated Use the Primus Framework - *) + @deprecated Use the Primus Framework *) module Storage : sig - - (** linear storage literally implements operational - semantics, but has O(N) lookup and uses space - very ineffectively, as it is implemented as a list - of assignments. *) class linear : storage + (** linear storage literally implements operational semantics, but has + O(N) lookup and uses space very ineffectively, as it is implemented as + a list of assignments. *) - (** sparse storage is slightly more efficient storage, - in comparison with linear. It uses balanced tree - data structure, and provides logarithmic lookup and - update method. *) class sparse : storage + (** sparse storage is slightly more efficient storage, in comparison with + linear. It uses balanced tree data structure, and provides logarithmic + lookup and update method. *) end [@@deprecated "[since 2018-03] in favor of the Primus Framework"] + (** Value of a result. We slightly diverge from an operational semantics by + allowing a user to provide its own storage implementation. - (** Value of a result. - We slightly diverge from an operational semantics by allowing - a user to provide its own storage implementation. - - In operational semantics a storage is represented - syntactically as + In operational semantics a storage is represented syntactically as {v v1 with [v2,ed] : nat <- v3, - v} - where v1 may be either a [Bot] value, representing an empty - memory (or an absence of knowledge), or another storage. So a - well typed memory object is defined inductively as: + v} + where v1 may be either a [Bot] value, representing an empty memory (or + an absence of knowledge), or another storage. So a well typed memory + object is defined inductively as: {v Inductive memory := | bot : memory | store : (mem : memory) (addr : value) (data : value). - v} + v} - That is equivalent to an assoc list. Although we provide an - assoc list as storage variant (see {!Storage.linear}), the - default storage is implemented slightly more effective, and - uses linear space and provides $log(N)$ lookup and update - methods. Users are encouraged to provide more efficient - storage implementations, for interpreters that rely heave on + That is equivalent to an assoc list. Although we provide an assoc list + as storage variant (see {!Storage.linear}), the default storage is + implemented slightly more effective, and uses linear space and provides + $log(N)$ lookup and update methods. Users are encouraged to provide more + efficient storage implementations, for interpreters that rely heave on memory throughput. - @deprecated Use the Primus Framework - *) + @deprecated Use the Primus Framework *) type value = - | Imm of word (** immediate value *) - | Mem of storage (** memory storage *) - | Bot (** undefined value *) - [@@deprecated "[since 2018-03] in favor of the Primus Framework"] + | Imm of word (** immediate value *) + | Mem of storage (** memory storage *) + | Bot (** undefined value *) (** Result of computation. - Result of an expression evaluation depends on a context. - Thus, each result has a unique identifier, associated with it, - that is usually provided by a context. The result is a - concrete value, that is created whenever an expression is - evaluated under a given context. Since, context is changed - during the evaluation (at least because a new result is - allocated), two consecutive evaluations of the same expression - will give different results. (This property is preserved by - Expi.context class, that provides methods for creating values - of type result). - - Since [Result.Id] is a regular type, it is possible to - associate arbitrary information (like taint information, - formulae, etc) with each result, using associative data - structures, like maps and hash tables. - - @deprecated Use the Primus Framework - *) - module Result : sig + Result of an expression evaluation depends on a context. Thus, each + result has a unique identifier, associated with it, that is usually + provided by a context. The result is a concrete value, that is created + whenever an expression is evaluated under a given context. Since, + context is changed during the evaluation (at least because a new result + is allocated), two consecutive evaluations of the same expression will + give different results. (This property is preserved by Expi.context + class, that provides methods for creating values of type result). - (** result identifier *) + Since [Result.Id] is a regular type, it is possible to associate + arbitrary information (like taint information, formulae, etc) with each + result, using associative data structures, like maps and hash tables. + + @deprecated Use the Primus Framework *) + module Result : sig type id + (** result identifier *) type t = result - (** State monad that evaluates to result *) - type 'a r = (result,'a) Monad.State.t + type 'a r = (result, 'a) Monad.State.t + (** State monad that evaluates to result *) - (** State monad that evaluates to unit *) - type 'a u = (unit,'a) Monad.State.t + type 'a u = (unit, 'a) Monad.State.t + (** State monad that evaluates to unit *) - (** [undefined id] creates a result with the given [id] and - undefined value *) val undefined : id -> t + (** [undefined id] creates a result with the given [id] and undefined + value *) - (** [storage s id] creates a result with the given [id] and - storage [s] as a value *) val storage : storage -> id -> t + (** [storage s id] creates a result with the given [id] and storage [s] as + a value *) - (** [word w id] creates a result with the given [id] and - word [w] as a value *) val word : word -> id -> t + (** [word w id] creates a result with the given [id] and word [w] as a + value *) - (** returns result's identifier *) val id : t -> id + (** returns result's identifier *) - (** returns result's value *) val value : t -> value + (** returns result's value *) - (** Result identifier. - Result is totally ordered regular value. *) + (** Result identifier. Result is totally ordered regular value. *) module Id : sig include Regular.S with type t = id - (** [zero] identifier *) val zero : t + (** [zero] identifier *) - (** [succ x] successor *) val succ : t -> t + (** [succ x] successor *) end module Value : Printable.S with type t = value @@ -2686,29 +2516,26 @@ module Std : sig Bil provides two prefix tries trees. The default one is not normalized and will compare bil statements - literally. This means that comparison is sensitive to variable - names and immediate values. Depending on your context it may be - find or not. For example, two [SP] variables may compare as different - if one of them was obtained from different compilation (and met - the other one through some persistent storage, e.g., file on hard - disk). Moreover, BIL obtained from different lifters will have - different names for the same registers. All this issues are - addressed in normalized [Trie]. *) + literally. This means that comparison is sensitive to variable names and + immediate values. Depending on your context it may be find or not. For + example, two [SP] variables may compare as different if one of them was + obtained from different compilation (and met the other one through some + persistent storage, e.g., file on hard disk). Moreover, BIL obtained + from different lifters will have different names for the same registers. + All this issues are addressed in normalized [Trie]. *) module Trie : sig type normalized_bil - (** [normalize ?subst bil] normalize BIL. If [subst] is provided, - then substitute each occurrence of the fst expression to the - snd expression before the normalization. The effect of - normalization is the following: - - 1. All immediate values are compared equal - 2. All variables are compared nominally - 3. BIL is simplified to reduce the syntactic differences - (but the comparison is still syntactic, and (x + 2) will - be compared differently to (2 + x). - *) val normalize : ?subst:(exp * exp) list -> stmt list -> normalized_bil + (** [normalize ?subst bil] normalize BIL. If [subst] is provided, then + substitute each occurrence of the fst expression to the snd expression + before the normalization. The effect of normalization is the + following: + + 1. All immediate values are compared equal 2. All variables are + compared nominally 3. BIL is simplified to reduce the syntactic + differences (but the comparison is still syntactic, and (x + 2) will + be compared differently to (2 + x). *) module Normalized : Trie.S with type key = normalized_bil include Trie.S with type key = stmt list @@ -2716,50 +2543,53 @@ module Std : sig type pass - (** [register_pass ~desc name pass] provides a pass to the BIL transformation pipeline. - The BIL transformation pipeline is applied after the lifting procedure, - i.e., it is embedded into each [lift] function of all Target modules. - (You can selectively register passes based on architecture by subscribing - to the [Project.Info.arch] variable). All passes that were in the selection - provided to the [select_passes] are applied in the order of the selection - until the fixed point is reached or a loop is detected. By default, no passes - are selected. The [bil] plugin provides a user interface for passes selection, - as well as some useful passes. - @since 1.5 *) val register_pass : ?desc:string -> string -> (t -> t) -> pass + (** [register_pass ~desc name pass] provides a pass to the BIL + transformation pipeline. The BIL transformation pipeline is applied + after the lifting procedure, i.e., it is embedded into each [lift] + function of all Target modules. (You can selectively register passes + based on architecture by subscribing to the [Project.Info.arch] + variable). All passes that were in the selection provided to the + [select_passes] are applied in the order of the selection until the + fixed point is reached or a loop is detected. By default, no passes are + selected. The [bil] plugin provides a user interface for passes + selection, as well as some useful passes. + @since 1.5 *) - (** [select_passes passes] select the [passes] for the BIL transformation pipeline. - See {!register_pass} for more information about the BIL transformation pipeline. - @since 1.5 - *) val select_passes : pass list -> unit + (** [select_passes passes] select the [passes] for the BIL transformation + pipeline. See {!register_pass} for more information about the BIL + transformation pipeline. + @since 1.5 *) + val passes : unit -> pass list (** [passes ()] returns all currently registered passes. @since 1.5 *) - val passes : unit -> pass list (** A BIL analysis pass @since 1.5 *) module Pass : sig - (** [name p] returns the name of the given pass. *) val name : pass -> string + (** [name p] returns the name of the given pass. *) + include Printable.S with type t := pass end end - type typ = Bil.typ [@@deriving bin_io, compare, sexp] - type var = Bil.var [@@deriving bin_io, compare, sexp] - type bil = Bil.t [@@deriving bin_io, compare, sexp] - type binop = Bil.binop [@@deriving bin_io, compare, sexp] - type cast = Bil.cast [@@deriving bin_io, compare, sexp] - type exp = Bil.exp [@@deriving bin_io, compare, sexp] - type stmt = Bil.stmt [@@deriving bin_io, compare, sexp] - type unop = Bil.unop [@@deriving bin_io, compare, sexp] + type typ = Bil.typ [@@deriving bin_io, compare, sexp] + type var = Bil.var [@@deriving bin_io, compare, sexp] + type bil = Bil.t [@@deriving bin_io, compare, sexp] + type binop = Bil.binop [@@deriving bin_io, compare, sexp] + type cast = Bil.cast [@@deriving bin_io, compare, sexp] + type exp = Bil.exp [@@deriving bin_io, compare, sexp] + type stmt = Bil.stmt [@@deriving bin_io, compare, sexp] + type unop = Bil.unop [@@deriving bin_io, compare, sexp] + (** The type of a BIL expression. - Each BIL expression is either an immediate value of a given - width, or a chunk of memory of a give size. The following - predefined constructors are brought to the scope: + Each BIL expression is either an immediate value of a given width, or a + chunk of memory of a give size. The following predefined constructors are + brought to the scope: - {{!bool_t}bool_t}; - {{!reg8_t}reg8_t}; @@ -2769,279 +2599,253 @@ module Std : sig - {{!reg128_t}reg128_t}; - {{!reg256_t}reg256_t}; - {{!mem32_t}mem32_t}; - - {{!mem64_t}mem64_t}. - *) + - {{!mem64_t}mem64_t}. *) module Type : sig (** type is either an immediate value or a storage *) - type t = Bil.typ = - | Imm of int - | Mem of addr_size * size - | Unk + type t = Bil.typ = Imm of int | Mem of addr_size * size | Unk [@@deriving variants] - (** type error *) type error [@@deriving bin_io, compare, sexp] + (** type error *) - (** [imm n] denotes a type of bitvectors of the given bitwidth *) val imm : int -> t + (** [imm n] denotes a type of bitvectors of the given bitwidth *) - (** [mem n m] denotes a type of memory storages with - the element type [imm m] and the index type [imm n] *) val mem : addr_size -> size -> t + (** [mem n m] denotes a type of memory storages with the element type + [imm m] and the index type [imm n] *) - (** [infer exp] is [Ok t] if [exp] is well-typed and has type [t] - otherwise [Error e]. - @since 1.3 - *) - val infer : exp -> (t,error) Result.t + val infer : exp -> (t, error) Result.t + (** [infer exp] is [Ok t] if [exp] is well-typed and has type [t] otherwise + [Error e]. + @since 1.3 *) - (** [infer_exn t] is the same as [ok_exn @@ infer_exn t]. - @since 1.3 - *) val infer_exn : exp -> t + (** [infer_exn t] is the same as [ok_exn @@ infer_exn t]. + @since 1.3 *) - (** [check bil] is [Ok ()] if [bil] is well-typed, otherwise the - first type error [e] is returned as [Error e]. - @since 1.3 - *) - val check : bil -> (unit,error) Result.t + val check : bil -> (unit, error) Result.t + (** [check bil] is [Ok ()] if [bil] is well-typed, otherwise the first type + error [e] is returned as [Error e]. + @since 1.3 *) + val sort : t -> unit Theory.Value.sort (** [sort t] translates the type [t] into a Core Theory sort. - @since 2.6.0 - *) - val sort : t -> unit Theory.Value.sort + @since 2.6.0 *) (** BIL type errors. - Not all syntactically correct expressions make sense. A - well-formed expression that doesn't have defined semantics is - called an ill-typed expression. We further distinguish between - different ill-typed expression to help the error diagnosis. + Not all syntactically correct expressions make sense. A well-formed + expression that doesn't have defined semantics is called an ill-typed + expression. We further distinguish between different ill-typed + expression to help the error diagnosis. - A [bad_mem] ill-typed expression is an expression that during - the evaluation may load or store from a bitvector. + A [bad_mem] ill-typed expression is an expression that during the + evaluation may load or store from a bitvector. - A [bad_imm] ill-typed expression is an expression that during - the evaluation may apply a integer operation on a storage - value. + A [bad_imm] ill-typed expression is an expression that during the + evaluation may apply a integer operation on a storage value. A [bad_type ~exp:t ~got:u] ill-typed expression may evaluate an - expression of type [u], where an expression of type [t] is - expected. For example, when a load address is evaluated to a - type that is different from a type of the memory index, or when - an integer operation is applied to expressions of different - types. + expression of type [u], where an expression of type [t] is expected. For + example, when a load address is evaluated to a type that is different + from a type of the memory index, or when an integer operation is applied + to expressions of different types. - Finally, a [bad_cast] expression is an expression that may - evaluate a bitvector of improper size or when a cast arguments - of a cast expression doesn't make sense. + Finally, a [bad_cast] expression is an expression that may evaluate a + bitvector of improper size or when a cast arguments of a cast expression + doesn't make sense. - @since 1.3 - *) + @since 1.3 *) module Error : sig - type t = error [@@deriving bin_io, compare, sexp] exception T of t [@@deriving sexp] - (** [bad_mem] error occurs when a value of type [mem] was expected *) val bad_mem : t + (** [bad_mem] error occurs when a value of type [mem] was expected *) - (** [bad_imm] error occurs when a value of type [imm] was expected *) val bad_imm : t + (** [bad_imm] error occurs when a value of type [imm] was expected *) - (** [bad_cast] error occurs when parameters to the cast operation - are not valid, or if a store operand is not of a word size *) val bad_cast : t + (** [bad_cast] error occurs when parameters to the cast operation are not + valid, or if a store operand is not of a word size *) - (** [bad_type ~exp ~got] error happens when we expect a value of - type [exp] but got a value of type [got]. *) val bad_type : exp:typ -> got:typ -> t + (** [bad_type ~exp ~got] error happens when we expect a value of type + [exp] but got a value of type [got]. *) - (** [expect_mem ()] raises [T bad_mem] *) val expect_mem : unit -> 'a + (** [expect_mem ()] raises [T bad_mem] *) - (** [expect_imm ()] raises [T bad_imm] *) val expect_imm : unit -> 'a + (** [expect_imm ()] raises [T bad_imm] *) - (** [wrong_cast ()] raises [T bad_cast] *) val wrong_cast : unit -> 'a + (** [wrong_cast ()] raises [T bad_cast] *) - (** [expect t ~got:u] raises [T (bap_type ~exp:t ~got:t)] *) val expect : typ -> got:typ -> 'a + (** [expect t ~got:u] raises [T (bap_type ~exp:t ~got:t)] *) include Regular.S with type t := t - end - (** BIL type is regular *) + include Regular.S with type t := t + (** BIL type is regular *) end - val bool_t : typ (** one bit *) - val reg8_t : typ (** 8-bit width value *) - val reg16_t : typ (** 16-bit width value *) - val reg32_t : typ (** 32-bit width value *) - val reg64_t : typ (** 64-bit width value *) - val reg128_t: typ (** 128-bit width value *) - val reg256_t: typ (** 256-bit width value *) + val bool_t : typ (* one bit *) + val reg8_t : typ (* 8-bit width value *) + val reg16_t : typ (* 16-bit width value *) + val reg32_t : typ (* 32-bit width value *) + val reg64_t : typ (* 64-bit width value *) + val reg128_t : typ (* 128-bit width value *) + val reg256_t : typ (* 256-bit width value *) - (** [mem32_t size] creates a type for memory with [32]-bit addresses - and elements of the given [size]. *) val mem32_t : size -> typ + (** [mem32_t size] creates a type for memory with [32]-bit addresses and + elements of the given [size]. *) - (** [mem64_t size] creates a type for memory with [64]-bit addresses - and elements of the given [size]. *) val mem64_t : size -> typ + (** [mem64_t size] creates a type for memory with [64]-bit addresses and + elements of the given [size]. *) (** BIL variable. - A variable is a symbolic name, that may have different values - during program evaluation. A variable may be virtual, in the - sense that it doesn't correspond to some physical location, or it - can be physical if a variable is a some physical location, e.g., - a register. All variables have types that designate a set of - values over which a variable ranges. + A variable is a symbolic name, that may have different values during + program evaluation. A variable may be virtual, in the sense that it + doesn't correspond to some physical location, or it can be physical if a + variable is a some physical location, e.g., a register. All variables have + types that designate a set of values over which a variable ranges. - BIL variables are regular values. Variables can have - indices. Usually the index is used to represent the same - variable but at different time or space (control flow path). - This is particularly useful for representing variables in SSA - form. + BIL variables are regular values. Variables can have indices. Usually the + index is used to represent the same variable but at different time or + space (control flow path). This is particularly useful for representing + variables in SSA form. - By default, comparison function takes indices into account. In - order to compare two variables regardless their index use [same] - function, or compare with [base x]. + By default, comparison function takes indices into account. In order to + compare two variables regardless their index use [same] function, or + compare with [base x]. {2 Printing} - A default pretty printer doesn't print zero indices and never - prints types. - *) + A default pretty printer doesn't print zero indices and never prints + types. *) module Var : sig - type t = var - - (** [reify v] reifies a core theory variable into the Bil variable. *) val reify : 'a Theory.var -> t + (** [reify v] reifies a core theory variable into the Bil variable. *) - (** [ident v] is the identifier of the variable [v] *) val ident : t -> Theory.Var.ident + (** [ident v] is the identifier of the variable [v] *) - (** [sort v] returns a core theory sort of the variable [v]. *) - val sort : t -> Theory.Value.Sort.Top.t + val sort : t -> Theory.Value.Sort.Top.t + (** [sort v] returns a core theory sort of the variable [v]. *) - (** [create ?register ?fresh name typ] creates a variable with - a given [name] and [typ]e. + val create : ?is_virtual:bool -> ?fresh:bool -> string -> typ -> t + (** [create ?register ?fresh name typ] creates a variable with a given + [name] and [typ]e. A newly created variable has version equal to 0. - If [fresh] is [true] (defaults to [false]), then a unique salt - is mixed to the name of variable, making it unique. + If [fresh] is [true] (defaults to [false]), then a unique salt is mixed + to the name of variable, making it unique. - If [is_virtual] is [true] (defaults to [false]), then a - variable is virtual, i.e., it doesn't correspond to some - physical register or memory location and was added to a program - artificially. - *) - val create : ?is_virtual:bool -> ?fresh:bool -> string -> typ -> t + If [is_virtual] is [true] (defaults to [false]), then a variable is + virtual, i.e., it doesn't correspond to some physical register or memory + location and was added to a program artificially. *) - (** [name var] returns a name associated with variable *) val name : t -> string + (** [name var] returns a name associated with variable *) - (** [typ var] returns a type associated with variable *) val typ : t -> typ + (** [typ var] returns a type associated with variable *) - (** [is_physical v] is [true] if [v] represents a contents of a - physical register. *) val is_physical : t -> bool + (** [is_physical v] is [true] if [v] represents a contents of a physical + register. *) - (** [is_virtual v] is [true] if [v] is not physical *) val is_virtual : t -> bool + (** [is_virtual v] is [true] if [v] is not physical *) - (** [with_index v i] returns a variable, that is identical to - [v], but with the index [i] *) val with_index : t -> int -> t + (** [with_index v i] returns a variable, that is identical to [v], but with + the index [i] *) - (** [index v] is an index of [v] *) val index : t -> int + (** [index v] is an index of [v] *) - (** [base v] returns an original variable. Essentially, - identical to [with_index v 0]. *) val base : t -> t + (** [base v] returns an original variable. Essentially, identical to + [with_index v 0]. *) - (** [same x y] compares variables ignoring indices, i.e., for - variables [x] and [y] the [same x y] is [true] iff [equal - (base x) (base y)] *) val same : t -> t -> bool + (** [same x y] compares variables ignoring indices, i.e., for variables [x] + and [y] the [same x y] is [true] iff [equal (base x) (base y)] *) - (** implements [Regular] interface *) - include Regular.S with type t := t - and type comparator_witness = Bil.var_compare - + (** implements [Regular] interface *) + include + Regular.S with type t := t and type comparator_witness = Bil.var_compare end (** Base class for evaluation contexts. - All interpreters evaluate terms under a given context, - wrapped into a state monad. All context types must be structural - subtypes of the [Context.t]. + All interpreters evaluate terms under a given context, wrapped into a + state monad. All context types must be structural subtypes of the + [Context.t]. The base context is just a mapping from variables to values. - Other than a type [Context.t] this module has n class [t] that - provides a logarithmic implementation for lookup and update - methods. + Other than a type [Context.t] this module has n class [t] that provides a + logarithmic implementation for lookup and update methods. - Since context, for any interpreter must be a structural subtype - of [Context.t] it is not required that this particular should be used. - Any implementation that has matching interface will work. + Since context, for any interpreter must be a structural subtype of + [Context.t] it is not required that this particular should be used. Any + implementation that has matching interface will work. - @deprecated Use the Primus Framework - *) + @deprecated Use the Primus Framework *) module Context : sig - - class t : object('s) - - (** [self#lookup var] evaluate variable [var] to a value that was - previously bound to it. Returns [None] if it is unbound. *) + class t : object ('s) method lookup : var -> Bil.result option + (** [self#lookup var] evaluate variable [var] to a value that was + previously bound to it. Returns [None] if it is unbound. *) - (** [self#update var x] bind variable [var] to value [x]. Returns a a - context updated with the new binding. *) method update : var -> Bil.result -> 's + (** [self#update var x] bind variable [var] to value [x]. Returns a a + context updated with the new binding. *) - (** [self#bindings] returns a current list of bindings. Useful, - for debugging and introspection. *) method bindings : (var * Bil.result) seq + (** [self#bindings] returns a current list of bindings. Useful, for + debugging and introspection. *) end - end [@@deprecated "[since 2018-03] in favor of the Primus Framework"] + end + [@@deprecated "[since 2018-03] in favor of the Primus Framework"] module Type_error : module type of Type.Error with type t = Type.Error.t - (** A BIL type error *) type type_error = Type_error.t [@@deriving bin_io, compare, sexp] + (** A BIL type error *) (** Basic and generic expression evaluator. - The module provides functors that derive base classes and class - types for Expi, Bili, and Biri. + The module provides functors that derive base classes and class types for + Expi, Bili, and Biri. - Note, this is a low-level interface that can be used if you want - to build your own evaluators (interpreters). If you want to use - already existing interpreter without drastically changing the - semantics of BIL consider using the Primus Framework. + Note, this is a low-level interface that can be used if you want to build + your own evaluators (interpreters). If you want to use already existing + interpreter without drastically changing the semantics of BIL consider + using the Primus Framework. - @since 1.3 - *) + @since 1.3 *) module Eval : sig - - (** An evaluator interface parametrized by a [T1] monad. *) - module T1(M : T1) : sig + (** An evaluator interface parametrized by a [T1] monad. *) + module T1 (M : T1) : sig type 'a m = 'a M.t - (** interface that describes semantics of an expression *) + (** interface that describes semantics of an expression *) class type ['r] semantics = object method eval_exp : exp -> 'r m method eval_var : var -> 'r m @@ -3049,8 +2853,8 @@ module Std : sig method eval_load : mem:exp -> addr:exp -> endian -> size -> 'r m method eval_store : mem:exp -> addr:exp -> exp -> endian -> size -> 'r m method eval_binop : binop -> exp -> exp -> 'r m - method eval_unop : unop -> exp -> 'r m - method eval_cast : cast -> int -> exp -> 'r m + method eval_unop : unop -> exp -> 'r m + method eval_cast : cast -> int -> exp -> 'r m method eval_let : var -> exp -> exp -> 'r m method eval_ite : cond:exp -> yes:exp -> no:exp -> 'r m method eval_concat : exp -> exp -> 'r m @@ -3059,7 +2863,7 @@ module Std : sig end (** interface of the evaluation value domain *) - class type virtual ['r,'s] domain = object + class type virtual ['r, 's] domain = object method private virtual undefined : 'r m method private virtual value_of_word : word -> 'r m method private virtual word_of_value : 'r -> word option m @@ -3067,324 +2871,315 @@ module Std : sig end (** interface of the computation effects *) - class type virtual ['r,'s] eff = object + class type virtual ['r, 's] eff = object method virtual lookup : var -> 'r m method virtual update : var -> 'r -> unit m - method virtual load : 's -> addr -> 'r m - method virtual store : 's -> addr -> word -> 'r m + method virtual load : 's -> addr -> 'r m + method virtual store : 's -> addr -> word -> 'r m end end - (** An evaluator parametrized by a [T2] monad. *) - module T2(M : T2) : sig - type ('a,'e) m = ('a,'e) M.t - - (** interface that describes semantics of an expression *) - class type ['a,'r] semantics = object - method eval_exp : exp -> ('r,'a) m - method eval_var : var -> ('r,'a) m - method eval_int : word -> ('r,'a) m - method eval_load : mem:exp -> addr:exp -> endian -> size -> ('r,'a) m - method eval_store : mem:exp -> addr:exp -> exp -> endian -> size -> ('r,'a) m - method eval_binop : binop -> exp -> exp -> ('r,'a) m - method eval_unop : unop -> exp -> ('r,'a) m - method eval_cast : cast -> int -> exp -> ('r,'a) m - method eval_let : var -> exp -> exp -> ('r,'a) m - method eval_ite : cond:exp -> yes:exp -> no:exp -> ('r,'a) m - method eval_concat : exp -> exp -> ('r,'a) m - method eval_extract : int -> int -> exp -> ('r,'a) m - method eval_unknown : string -> typ -> ('r,'a) m + (** An evaluator parametrized by a [T2] monad. *) + module T2 (M : T2) : sig + type ('a, 'e) m = ('a, 'e) M.t + + (** interface that describes semantics of an expression *) + class type ['a, 'r] semantics = object + method eval_exp : exp -> ('r, 'a) m + method eval_var : var -> ('r, 'a) m + method eval_int : word -> ('r, 'a) m + method eval_load : mem:exp -> addr:exp -> endian -> size -> ('r, 'a) m + + method eval_store : + mem:exp -> addr:exp -> exp -> endian -> size -> ('r, 'a) m + + method eval_binop : binop -> exp -> exp -> ('r, 'a) m + method eval_unop : unop -> exp -> ('r, 'a) m + method eval_cast : cast -> int -> exp -> ('r, 'a) m + method eval_let : var -> exp -> exp -> ('r, 'a) m + method eval_ite : cond:exp -> yes:exp -> no:exp -> ('r, 'a) m + method eval_concat : exp -> exp -> ('r, 'a) m + method eval_extract : int -> int -> exp -> ('r, 'a) m + method eval_unknown : string -> typ -> ('r, 'a) m end (** interface of the evaluation value domain *) - class type virtual ['a,'r,'s] domain = object - method private virtual undefined : ('r,'a) m - method private virtual value_of_word : word -> ('r,'a) m - method private virtual word_of_value : 'r -> (word option,'a) m - method private virtual storage_of_value : 'r -> ('s option,'a) m + class type virtual ['a, 'r, 's] domain = object + method private virtual undefined : ('r, 'a) m + method private virtual value_of_word : word -> ('r, 'a) m + method private virtual word_of_value : 'r -> (word option, 'a) m + method private virtual storage_of_value : 'r -> ('s option, 'a) m end (** interface of the computation effects *) - class type virtual ['a,'r,'s] eff = object - method virtual lookup : var -> ('r,'a) m - method virtual update : var -> 'r -> (unit,'a) m - method virtual load : 's -> addr -> ('r,'a) m - method virtual store : 's -> addr -> word -> ('r,'a) m + class type virtual ['a, 'r, 's] eff = object + method virtual lookup : var -> ('r, 'a) m + method virtual update : var -> 'r -> (unit, 'a) m + method virtual load : 's -> addr -> ('r, 'a) m + method virtual store : 's -> addr -> word -> ('r, 'a) m end end - (** An interface of a basic evaluator in a [T1] monad *) + (** An interface of a basic evaluator in a [T1] monad *) module type S = sig type 'a m + module M : T1 with type 'a t = 'a m class type ['r] semantics = ['r] T1(M).semantics - class type virtual ['r,'s] domain = ['r,'s] T1(M).domain - class type virtual ['r,'s] eff = ['r,'s] T1(M).eff + class type virtual ['r, 's] domain = ['r, 's] T1(M).domain + class type virtual ['r, 's] eff = ['r, 's] T1(M).eff - (** a virtual base class for all evaluators *) - class virtual ['r,'s] t : object - inherit ['r,'s] domain - inherit ['r,'s] eff + (** a virtual base class for all evaluators *) + class virtual ['r, 's] t : object + inherit ['r, 's] domain + inherit ['r, 's] eff inherit ['r] semantics method type_error : type_error -> 'r m method division_by_zero : unit -> 'r m end end - (** An interface of a basic evaluator in a [T1] monad *) + (** An interface of a basic evaluator in a [T1] monad *) module type S2 = sig - type ('a,'e) m - module M : T2 with type ('a,'e) t = ('a,'e) m - - class type ['a,'r] semantics = ['a,'r] T2(M).semantics - class type virtual ['a,'r,'s] domain = ['a,'r,'s] T2(M).domain - class type virtual ['a,'r,'s] eff = ['a,'r,'s] T2(M).eff - - (** a virtual base class for all evaluators *) - class virtual ['a,'r,'s] t : object - inherit ['a,'r,'s] domain - inherit ['a,'r,'s] eff - inherit ['a,'r] semantics - method type_error : type_error -> ('r,'a) m - method division_by_zero : unit -> ('r,'a) m + type ('a, 'e) m + + module M : T2 with type ('a, 'e) t = ('a, 'e) m + + class type ['a, 'r] semantics = ['a, 'r] T2(M).semantics + class type virtual ['a, 'r, 's] domain = ['a, 'r, 's] T2(M).domain + class type virtual ['a, 'r, 's] eff = ['a, 'r, 's] T2(M).eff + + (** a virtual base class for all evaluators *) + class virtual ['a, 'r, 's] t : object + inherit ['a, 'r, 's] domain + inherit ['a, 'r, 's] eff + inherit ['a, 'r] semantics + method type_error : type_error -> ('r, 'a) m + method division_by_zero : unit -> ('r, 'a) m end end - (** [Make2(M)] provides an implementation of the [S2] interface - lifted into the monad [M]. *) - module Make2(M : Monad.S2) : S2 with type ('a,'e) m := ('a,'e) M.t - and module M := M + (** [Make2(M)] provides an implementation of the [S2] interface lifted into + the monad [M]. *) + module Make2 (M : Monad.S2) : + S2 with type ('a, 'e) m := ('a, 'e) M.t and module M := M - (** [Make(M)] provides an implementation of the [S2] interface - lifted into the monad [M]. *) - module Make(M : Monad.S) : S with type 'a m := 'a M.t - and module M := M + (** [Make(M)] provides an implementation of the [S2] interface lifted into + the monad [M]. *) + module Make (M : Monad.S) : S with type 'a m := 'a M.t and module M := M end (** Expression Language Interpreter. - @deprecated Use the Primus Framework - *) + @deprecated Use the Primus Framework *) module Expi : sig - open Bil.Result - (** - - An extensible interpreter for BIL expressions. + (** An extensible interpreter for BIL expressions. - Note: before diving into the deepness of Expi module consider - [Exp.eval] function, that expose an easy interface to concrete - evaluation of expressions. + Note: before diving into the deepness of Expi module consider [Exp.eval] + function, that expose an easy interface to concrete evaluation of + expressions. - Expi implements an operational semantics described in [[1]]. + Expi implements an operational semantics described in [[1]]. - @see - - [[1]]: BIL Semantics. - *) + @see + [[1]]: BIL Semantics. *) (** Context for expression evaluation. - Context provides a unique identifier for each freshly created - value. *) - class context : object('s) + Context provides a unique identifier for each freshly created value. *) + class context : object ('s) inherit Context.t - (** creates a fresh new result, containing an undefined value, - and returns it with a modified context. *) method create_undefined : 's * Bil.result + (** creates a fresh new result, containing an undefined value, and returns + it with a modified context. *) - (** creates a fresh new result, containing a given word, - and returns it with a modified context. *) method create_word : word -> 's * Bil.result + (** creates a fresh new result, containing a given word, and returns it + with a modified context. *) - (** creates a fresh new result, containing a given storage, - and returns it with a modified context. *) method create_storage : Bil.storage -> 's * Bil.result + (** creates a fresh new result, containing a given storage, and returns it + with a modified context. *) end module type S = sig + type ('a, 'e) state + type 'a u = (unit, 'a) state + type 'a r = (Bil.result, 'a) state - type ('a,'e) state - type 'a u = (unit,'a) state - type 'a r = (Bil.result,'a) state - - module M : T2 with type ('a,'e) t = ('a,'e) state + module M : T2 with type ('a, 'e) t = ('a, 'e) state - (** @since 1.3 *) - module Eval : Eval.S2 with type ('a,'e) m := ('a,'e) state - and module M := M + (** @since 1.3 *) + module Eval : + Eval.S2 with type ('a, 'e) m := ('a, 'e) state and module M := M (** Expression interpreter. - Expi is a base class for all other interpreters (see {!bili} - and {!biri}, that do all the hard work. Expi recognizes a - language defined by [exp] type. It evaluates arbitrary - expressions under provided {{!Context}context}. + Expi is a base class for all other interpreters (see {!bili} and + {!biri}, that do all the hard work. Expi recognizes a language defined + by [exp] type. It evaluates arbitrary expressions under provided + {{!Context}context}. To create new interpreter use operator [new]: {v - let expi = new expi;; - val expi : _#Expi.context expi = - v} + let expi = new expi;; + val expi : _#Expi.context expi = + v} Note: The type [_#Expi.context] is weakly polymorphic subtype of [Expi.context][1]. Basically, this means, that the type is not - generalized and will be instantiated when used and fixed - afterwards. + generalized and will be instantiated when used and fixed afterwards. {v - let r = expi#eval_exp Bil.(int Word.b0 lor int Word.b1);; - val r : _#Expi.context Bil.Result.r = - v} + let r = expi#eval_exp Bil.(int Word.b0 lor int Word.b1);; + val r : _#Expi.context Bil.Result.r = + v} - The returned value is a state monad parametrized by a subtype - of class [Expi.context]. The state monad is a chain of - computations, where each computation is merely a function from - state to a state paired with the result of computation. The - state is accessible inside the computation and can be - changed. + The returned value is a state monad parametrized by a subtype of class + [Expi.context]. The state monad is a chain of computations, where each + computation is merely a function from state to a state paired with the + result of computation. The state is accessible inside the computation + and can be changed. - To run the computation use [Monad.State.eval] function, that - accepts a state monad and an initial value. Here we can - provide any subtype of [Expi.context] as an initial - value. Let start with a [Expi.context] as a first approximation: + To run the computation use [Monad.State.eval] function, that accepts a + state monad and an initial value. Here we can provide any subtype of + [Expi.context] as an initial value. Let start with a [Expi.context] as + a first approximation: {v - let x = Monad.State.eval r (new Expi.context);; - val x : Bil.result = [0x3] true - v} + let x = Monad.State.eval r (new Expi.context);; + val x : Bil.result = [0x3] true + v} - The expression evaluates to [true], and the result is tagged - with an identifier [[0x3]]. The [Exp.context] assigns a unique - identifier for each freshly created result. Tag [[0x3]] means - that this was the third value created under provided context. + The expression evaluates to [true], and the result is tagged with an + identifier [[0x3]]. The [Exp.context] assigns a unique identifier for + each freshly created result. Tag [[0x3]] means that this was the third + value created under provided context. - If the only thing, that you need is just to evaluate an - expression, then you can just use [Exp.eval] function: + If the only thing, that you need is just to evaluate an expression, + then you can just use [Exp.eval] function: {v - Exp.eval Bil.(int Word.b0 lor int Word.b1);; - - : Bil.value = true - v} + Exp.eval Bil.(int Word.b0 lor int Word.b1);; + - : Bil.value = true + v} - The main strength of [expi] is its extensibility. Let's write - a expression evaluator that will record a trace of evaluation: + The main strength of [expi] is its extensibility. Let's write a + expression evaluator that will record a trace of evaluation: {[ - class context = object - inherit Expi.context - val events : (exp * Bil.result) list = [] - method add_event exp res = {< events = (exp,res) :: events >} - method show_events = List.rev events - end + class context = + object + inherit Expi.context + val events : (exp * Bil.result) list = [] + method add_event exp res = {} + method show_events = List.rev events + end ]} {[ - class ['a] exp_tracer = object - constraint 'a = #context - inherit ['a] expi as super - method! eval_exp e = - let open Monad.State in - super#eval_exp e >>= fun r -> - get () >>= fun ctxt -> - put (ctxt#add_event e r) >>= fun () -> - return r - end;; + class ['a] exp_tracer = + object + constraint 'a = #context + inherit ['a] expi as super + + method! eval_exp e = + let open Monad.State in + super#eval_exp e >>= fun r -> + get () >>= fun ctxt -> + put (ctxt#add_event e r) >>= fun () -> return r + end ]} - Note : We made our [exp_tracer] class polymorphic as a - courtesy to our fellow programmer, that may want to reuse it. - We can define it by inheriting from [expi] parametrized with - our context type, like this: [inherit [context] expi] + Note : We made our [exp_tracer] class polymorphic as a courtesy to our + fellow programmer, that may want to reuse it. We can define it by + inheriting from [expi] parametrized with our context type, like this: + [inherit [context] expi] - Also, there is no need to write a [constraint], as it will be - inferred automatically. + Also, there is no need to write a [constraint], as it will be inferred + automatically. - Now, let's try to use our tracer. We will use - [Monad.State.run] function, that returns both, the evaluated - value and the context. (We can also use [Monad.State.exec], if - we're not interested in value at all): + Now, let's try to use our tracer. We will use [Monad.State.run] + function, that returns both, the evaluated value and the context. (We + can also use [Monad.State.exec], if we're not interested in value at + all): {v - let expi = new exp_tracer;; - val expi : _#context exp_tracer = - # let r = expi#eval_exp Bil.(int Word.b0 lor int Word.b1);; - val r : _#context Bil.Result.r = - # let r,ctxt = Monad.State.run r (new context) ;; - val r : Bil.result = [0x3] true - val ctxt : context = - ctxt#events;; - - : (exp * Bil.result) list = - [(false, [0x1] false); (true, [0x2] true); (false | true, [0x3] true)] - v} + let expi = new exp_tracer;; + val expi : _#context exp_tracer = + # let r = expi#eval_exp Bil.(int Word.b0 lor int Word.b1);; + val r : _#context Bil.Result.r = + # let r,ctxt = Monad.State.run r (new context) ;; + val r : Bil.result = [0x3] true + val ctxt : context = + ctxt#events;; + - : (exp * Bil.result) list = + [(false, [0x1] false); (true, [0x2] true); (false | true, [0x3] true)] + v} - [1]: The weakness of the type variable is introduced by - a value restriction and can't be relaxed since it is invariant - in state monad. + [1]: The weakness of the type variable is introduced by a value + restriction and can't be relaxed since it is invariant in state monad. *) class ['a] t : object constraint 'a = #context + inherit ['a, Bil.result] Eval.semantics (** {2 Interaction with environment} *) - (** creates an empty storage. If you want to provide - your own implementation of storage, then it is definitely - the right place. *) - method empty : Bil.storage + method empty : Bil.storage + (** creates an empty storage. If you want to provide your own + implementation of storage, then it is definitely the right place. *) - (** a variable is looked up in a context *) method lookup : var -> 'a r + (** a variable is looked up in a context *) - (** a variable is bind to a value.*) method update : var -> Bil.result -> 'a u + (** a variable is bind to a value.*) - (** a byte is loaded from a given address *) - method load : Bil.storage -> addr -> 'a r + method load : Bil.storage -> addr -> 'a r + (** a byte is loaded from a given address *) - (** a byte is stored to a a given address *) - method store : Bil.storage -> addr -> word -> 'a r + method store : Bil.storage -> addr -> word -> 'a r + (** a byte is stored to a a given address *) - (** {2 Error conditions} *) + (** {2 Error conditions} *) - (** a given typing error has occurred *) method type_error : type_error -> 'a r + (** a given typing error has occurred *) - (** we can't do this! *) method division_by_zero : unit -> 'a r + (** we can't do this! *) - (** called when storage doesn't contain the addr *) method undefined_addr : addr -> 'a r + (** called when storage doesn't contain the addr *) - (** called when context doesn't know the variable *) - method undefined_var : var -> 'a r + method undefined_var : var -> 'a r + (** called when context doesn't know the variable *) end end - module Make(M : Monad.State.S2) : S - with type ('a,'e) state = ('a,'e) M.t - - include S with type ('a,'e) state = ('a,'e) Monad.State.t - end [@@deprecated "[since 2018-03] in favor of the Primus Framework"] + module Make (M : Monad.State.S2) : S with type ('a, 'e) state = ('a, 'e) M.t + include S with type ('a, 'e) state = ('a, 'e) Monad.State.t + end + [@@deprecated "[since 2018-03] in favor of the Primus Framework"] - (** Expression {{!Expi}interpreter} - @deprecated Use the Primus Framework - *) class ['a] expi : ['a] Expi.t - [@@deprecated "[since 2018-03] in favor of the Primus Framework"] + (** Expression {{!Expi}interpreter} + @deprecated Use the Primus Framework *) (** BIL Interpreter. - [bili] extends [expi] with methods for evaluating BIL - statements, thus allowing one to interpret BIL AST. To - interpret BIL in the intermediate representation use - {{!Biri}biri}. + [bili] extends [expi] with methods for evaluating BIL statements, thus + allowing one to interpret BIL AST. To interpret BIL in the intermediate + representation use {{!Biri}biri}. - Also, if you don't need to change the default behavior - of the interpreter, then you may use {!Stmt.eval} that - exposes an easier interface for BIL evaluation. For example, + Also, if you don't need to change the default behavior of the interpreter, + then you may use {!Stmt.eval} that exposes an easier interface for BIL + evaluation. For example, {v let x = Var.create "x" bool_t;; @@ -3393,30 +3188,27 @@ module Std : sig val ctxt : Bili.context = ctxt#bindings |> Seq.to_list;; - : (var * Bil.result) list = [(x, [0x1] false)] - v} + v} - @deprecated Use the Primus Framework - *) + @deprecated Use the Primus Framework *) module Bili : sig - open Bil.Result - (** [Bili.context] extends [Expi.context] with PC (Program - Counter). *) - class context : object('s) + (** [Bili.context] extends [Expi.context] with PC (Program Counter). *) + class context : object ('s) inherit Expi.context method pc : Bil.value method with_pc : Bil.value -> 's end module type S = sig - type ('a,'e) state - type 'a u = (unit,'a) state - type 'a r = (Bil.result,'a) state + type ('a, 'e) state + type 'a u = (unit, 'a) state + type 'a r = (Bil.result, 'a) state - module Expi : Expi.S with type ('a,'e) state = ('a,'e) state + module Expi : Expi.S with type ('a, 'e) state = ('a, 'e) state - (** Base class for BIL interpreters *) + (** Base class for BIL interpreters *) class ['a] t : object constraint 'a = #context inherit ['a] Expi.t @@ -3431,103 +3223,95 @@ module Std : sig end end - module Make(M : Monad.State.S2) : S with type ('a,'e) state = ('a,'e) M.t - include S with type ('a,'e) state = ('a,'e) Monad.State.t - end [@@deprecated "[since 2018-03] in favor of the Primus Framework"] - + module Make (M : Monad.State.S2) : S with type ('a, 'e) state = ('a, 'e) M.t + include S with type ('a, 'e) state = ('a, 'e) Monad.State.t + end + [@@deprecated "[since 2018-03] in favor of the Primus Framework"] - (** BIL {{!Bili}interpreter} - @deprecated Use the Primus Framework - *) class ['a] bili : ['a] Bili.t - [@@deprecated "[since 2018-03] in favor of the Primus Framework"] + (** BIL {{!Bili}interpreter} + @deprecated Use the Primus Framework *) (** Effect analysis. - Effect analysis describes how an expression computation - interacts with the outside world. By the outside world we - understand the whole of the CPU state (including the hidden - state) and the memory. We distinguish, so far, between the - following sorts of effects: + Effect analysis describes how an expression computation interacts with the + outside world. By the outside world we understand the whole of the CPU + state (including the hidden state) and the memory. We distinguish, so far, + between the following sorts of effects: - - coeffects - a value of an expression depends on the outside - world, that is further subdivided by the read effect, when an - expression reads a CPU register, and the load effect, when an - expression an expression accesses the memory. + - coeffects - a value of an expression depends on the outside world, that + is further subdivided by the read effect, when an expression reads a CPU + register, and the load effect, when an expression an expression accesses + the memory. - - effects - a value modifies the state of the world, by either - storing a value in the memory, or by raising a CPU exception - via the division by zero or accessing the memory. + - effects - a value modifies the state of the world, by either storing a + value in the memory, or by raising a CPU exception via the division by + zero or accessing the memory. - An expression that doesn't have effects or coeffects is - idempotent and can be moved arbitrary in a tree, removed or - substituted. An expression that has only [coeffects] is - generative and can be reproduced without a significant change - of semantics. + An expression that doesn't have effects or coeffects is idempotent and can + be moved arbitrary in a tree, removed or substituted. An expression that + has only [coeffects] is generative and can be reproduced without a + significant change of semantics. Examples: - [x ^ x], [x+1], [x] - have coeffects; - [x[y]] - has both effects (may raise pagefault) and coeffects; - [7 * 8], [42] - have no effects. - @since 1.3 - - *) + @since 1.3 *) module Eff : sig - - (** a set of expression effects *) type t + (** a set of expression effects *) - (** an expression doesn't have any effects *) val none : t + (** an expression doesn't have any effects *) - (** an expression reads a register (nonvirtual) variable. *) val read : t + (** an expression reads a register (nonvirtual) variable. *) - (** an expression loads a value from a memory *) val load : t + (** an expression loads a value from a memory *) - (** an expression stores a value in a memory *) val store : t + (** an expression stores a value in a memory *) - (** an expression raises a CPU exception *) val raise : t + (** an expression raises a CPU exception *) - (** [reads eff] if [read] in [eff] *) val reads : t -> bool + (** [reads eff] if [read] in [eff] *) - (** [loads eff] if [load] in [eff] *) val loads : t -> bool + (** [loads eff] if [load] in [eff] *) - (** [stores eff] if [load] in [eff] *) val stores : t -> bool + (** [stores eff] if [load] in [eff] *) - (** [raises eff] if [raise] in [eff] *) val raises : t -> bool + (** [raises eff] if [raise] in [eff] *) - (** [has_effects eff] if [stores eff] || [raises eff] *) val has_effects : t -> bool + (** [has_effects eff] if [stores eff] || [raises eff] *) - (** [has_coeffects eff] if [loads eff] || [reads eff] *) - val has_coeffects : t -> bool + val has_coeffects : t -> bool + (** [has_coeffects eff] if [loads eff] || [reads eff] *) - (** [compute x] computes a set of effects produced by [x]. The - result is a sound overapproximation of the real effects, - i.e., if an effect is computed then it may really happen, - but if it is not computed, then it is proved that it is not - possible for the expression to have this effect. + (** [compute x] computes a set of effects produced by [x]. The result is a + sound overapproximation of the real effects, i.e., if an effect is + computed then it may really happen, but if it is not computed, then it + is proved that it is not possible for the expression to have this + effect. - The analysis applies a simple abstract interpretation to - approximate arithmetic and prove an absence of the division - by zero. The load/store/read analysis is more precise than - the division by zero, as the only source of the imprecision - is a presence of conditional expressions. + The analysis applies a simple abstract interpretation to approximate + arithmetic and prove an absence of the division by zero. The + load/store/read analysis is more precise than the division by zero, as + the only source of the imprecision is a presence of conditional + expressions. Requires: normalized and simplified expression. - Warning: the above should be either relaxed or expressed in - the type system. - *) + Warning: the above should be either relaxed or expressed in the type + system. *) val compute : exp -> t end @@ -3536,22 +3320,20 @@ module Std : sig module Exp : sig type t = Bil.exp - + val slot : (Theory.Value.cls, exp) KB.slot (** the Exp.t property. This property of a value denotes it in terms of Bil expressions.*) - val slot : (Theory.Value.cls, exp) KB.slot - (** All visitors provide some information about the current - position of the visitor *) + (** All visitors provide some information about the current position of the + visitor *) class state : object - + val exps_stack : exp list (** a stack of expr, that are parents for the currently visiting expression *) - val exps_stack : exp list - (** is [true] if currently visiting entry is executed conditionally *) val under_condition : bool + (** is [true] if currently visiting entry is executed conditionally *) end (** expression visitor. @@ -3563,115 +3345,123 @@ module Std : sig For each AST constructor [C] the visitor provides three methods: [enter_C], [visit_C], [leave_C]. The default implementation for [enter_C] and [leave_C] is to return its argument. The default - implementation for [visit_C] is the following: - 1. call [enter_C] - 2. visit all children - 3. call [leave_C]. + implementation for [visit_C] is the following: 1. call [enter_C] 2. + visit all children 3. call [leave_C]. - It is recommended to override [enter_C] method if you only need - to visit [C] constructor without changing a way you're visiting - the tree. + It is recommended to override [enter_C] method if you only need to visit + [C] constructor without changing a way you're visiting the tree. - See also {!Bil.visitor} and {!Term.visitor} for visiting a - program in AST and Graph representation, respectively. - *) + See also {!Bil.visitor} and {!Term.visitor} for visiting a program in + AST and Graph representation, respectively. *) class ['a] visitor : object inherit state - method enter_exp : t -> 'a -> 'a method visit_exp : t -> 'a -> 'a method leave_exp : t -> 'a -> 'a - (** [Load (src,addr,endian,size)] *) method enter_load : mem:t -> addr:t -> endian -> size -> 'a -> 'a + (** [Load (src,addr,endian,size)] *) + method visit_load : mem:t -> addr:t -> endian -> size -> 'a -> 'a method leave_load : mem:t -> addr:t -> endian -> size -> 'a -> 'a - (** [Store (dst,addr,src,endian,size)] *) - method enter_store : mem:t -> addr:t -> exp:t -> endian -> size -> 'a -> 'a - method visit_store : mem:t -> addr:t -> exp:t -> endian -> size -> 'a -> 'a - method leave_store : mem:t -> addr:t -> exp:t -> endian -> size -> 'a -> 'a + method enter_store : + mem:t -> addr:t -> exp:t -> endian -> size -> 'a -> 'a + (** [Store (dst,addr,src,endian,size)] *) + + method visit_store : + mem:t -> addr:t -> exp:t -> endian -> size -> 'a -> 'a + + method leave_store : + mem:t -> addr:t -> exp:t -> endian -> size -> 'a -> 'a - (** [BinOp (op,e1,e2)] *) method enter_binop : binop -> t -> t -> 'a -> 'a + (** [BinOp (op,e1,e2)] *) + method visit_binop : binop -> t -> t -> 'a -> 'a method leave_binop : binop -> t -> t -> 'a -> 'a - (** [Unop (op,e)] *) method enter_unop : unop -> t -> 'a -> 'a + (** [Unop (op,e)] *) + method visit_unop : unop -> t -> 'a -> 'a method leave_unop : unop -> t -> 'a -> 'a - (** [Cast(kind,size,e)] *) method enter_cast : cast -> int -> t -> 'a -> 'a + (** [Cast(kind,size,e)] *) + method visit_cast : cast -> int -> t -> 'a -> 'a method leave_cast : cast -> int -> t -> 'a -> 'a - (** [Let (v,t,body)] *) method enter_let : var -> exp:t -> body:t -> 'a -> 'a + (** [Let (v,t,body)] *) + method visit_let : var -> exp:t -> body:t -> 'a -> 'a method leave_let : var -> exp:t -> body:t -> 'a -> 'a - (** [Ite (cond,yes,no)] *) method enter_ite : cond:t -> yes:t -> no:t -> 'a -> 'a + (** [Ite (cond,yes,no)] *) + method visit_ite : cond:t -> yes:t -> no:t -> 'a -> 'a method leave_ite : cond:t -> yes:t -> no:t -> 'a -> 'a - (** [Extract (hi,lo,e)] *) method enter_extract : hi:int -> lo:int -> t -> 'a -> 'a + (** [Extract (hi,lo,e)] *) + method visit_extract : hi:int -> lo:int -> t -> 'a -> 'a method leave_extract : hi:int -> lo:int -> t -> 'a -> 'a - (** [Concat(e1,e2)] *) method enter_concat : t -> t -> 'a -> 'a + (** [Concat(e1,e2)] *) + method visit_concat : t -> t -> 'a -> 'a method leave_concat : t -> t -> 'a -> 'a - (** {2 Leaves} *) - (** [Int w] *) method enter_int : word -> 'a -> 'a + (** {2 Leaves} + [Int w] *) + method visit_int : word -> 'a -> 'a method leave_int : word -> 'a -> 'a - (** [Var v] *) method enter_var : var -> 'a -> 'a + (** [Var v] *) + method visit_var : var -> 'a -> 'a method leave_var : var -> 'a -> 'a - (** [Unknown (str,typ)] *) method enter_unknown : string -> typ -> 'a -> 'a + (** [Unknown (str,typ)] *) + method visit_unknown : string -> typ -> 'a -> 'a method leave_unknown : string -> typ -> 'a -> 'a end - (** A visitor with a shortcut. - Finder is a specialization of a visitor, that uses [return] as its - folding argument. At any time you can stop the traversing by - calling [return] function of the provided argument (which is by - itself is a record with one field - a function accepting argument + (** A visitor with a shortcut. Finder is a specialization of a visitor, that + uses [return] as its folding argument. At any time you can stop the + traversing by calling [return] function of the provided argument (which + is by itself is a record with one field - a function accepting argument of type ['a option]).*) class ['a] finder : object inherit ['a option return] visitor method find : t -> 'a option end - (** Exp mapper. - By default performs deep identity mapping. Non-leaf methods - deconstructs terms, calls corresponding methods on its parts - and the constructs it back. So if you're overriding a non-leaf - method, then make sure that you called the parent method if - you want a normal traversal. + (** Exp mapper. By default performs deep identity mapping. Non-leaf methods + deconstructs terms, calls corresponding methods on its parts and the + constructs it back. So if you're overriding a non-leaf method, then make + sure that you called the parent method if you want a normal traversal. A usual template for method overriding is: {[ - object(self) + object (self) inherit mapper as super - method map_X arg= + + method map_X arg = let x = super#map_X arg in do_mapping x end - ]} - *) + ]} *) class mapper : object inherit state method map_exp : t -> t @@ -3690,290 +3480,280 @@ module Std : sig method map_unknown : string -> typ -> t end - (** [fold visitor ~init exp] traverse the [exp] tree with - provided [visitor]. For example, the following will collect - all address that are accessed with a load operation: - [{ - let collect_load_addresses = Exp.fold ~init:[] (object - inherit [word list] Bil.visitor - method! enter_load ~mem ~addr _ _ addrs = - match addr with - | Bil.Int addr -> addr :: addrs - | _ -> addrs - end) - }] - See also {!Bil.fold} and {!Stmt.fold} - *) val fold : 'a #visitor -> init:'a -> t -> 'a + (** [fold visitor ~init exp] traverse the [exp] tree with provided + [visitor]. For example, the following will collect all address that are + accessed with a load operation: + [{ let collect_load_addresses = Exp.fold ~init:[] (object inherit [word + list] Bil.visitor method! enter_load ~mem ~addr _ _ addrs = match addr + with | Bil.Int addr -> addr :: addrs | _ -> addrs end) }] See also + {!Bil.fold} and {!Stmt.fold} *) - (** [iter visitor exp] iterates over all terms of the [exp] using - provided visitor. See also {!Bil.iter} and {!Stmt.iter} *) val iter : unit #visitor -> t -> unit + (** [iter visitor exp] iterates over all terms of the [exp] using provided + visitor. See also {!Bil.iter} and {!Stmt.iter} *) - (** [find finder exp] returns [Some thing] if finder finds some - [thing]. See also {!Bil.find} and {!Stmt.find} *) val find : 'a #finder -> t -> 'a option + (** [find finder exp] returns [Some thing] if finder finds some [thing]. See + also {!Bil.find} and {!Stmt.find} *) - (** [map mapper exp] maps [exp] tree using provided [mapper]. - See also {!Bil.map} *) - val map : #mapper -> t -> t + val map : #mapper -> t -> t + (** [map mapper exp] maps [exp] tree using provided [mapper]. See also + {!Bil.map} *) - (** [exists finder exp] is [true] if [finder] finds - something. See also {!Bil.exists} and {Stmt.exists} *) val exists : unit #finder -> t -> bool + (** [exists finder exp] is [true] if [finder] finds something. See also + {!Bil.exists} and {!Stmt.exists} *) - (** [substitute pat rep x] subsitutes each occurrence of an - expression [pat] in [x] with an expression [rep] *) val substitute : exp -> exp -> exp -> exp + (** [substitute pat rep x] subsitutes each occurrence of an expression [pat] + in [x] with an expression [rep] *) + val normalize : exp -> exp (** [normalize x] ensures no-lets and normalized-memory (BNF2). - Inlines all let expressions, expands multibyte loads to a - concatenation of one byte loads, and expands multibyte stores - into chains of one byte stores. + Inlines all let expressions, expands multibyte loads to a concatenation + of one byte loads, and expands multibyte stores into chains of one byte + stores. The function may duplicate expressions even those that are not generative, thus breaking the semantics of the expression. Precondition: [x] is well-typed and in BNF1. - See {!Stmt.normalize} for the definition of the BNF1 and - BNF2. - - @since 1.3 - *) - val normalize : exp -> exp + See {!Stmt.normalize} for the definition of the BNF1 and BNF2. + @since 1.3 *) - (** [simpl ~ignore:effects x] iff expression [x] is well-typed, - then returns an expression with the same semantics as [x], - that might smaller according to some metrics. A subexression - is removed from [x] if it doesn't manifest any effects other - than those that are specified with the [~ignore:effects] - parameter (defaults to an empty list). + val simpl : ?ignore:Eff.t list -> exp -> exp + (** [simpl ~ignore:effects x] iff expression [x] is well-typed, then returns + an expression with the same semantics as [x], that might smaller + according to some metrics. A subexression is removed from [x] if it + doesn't manifest any effects other than those that are specified with + the [~ignore:effects] parameter (defaults to an empty list). The following code simplification are applied: - - constant folding: if an expression can be computed - statically then it is substituted with the result of - computation, e.g., [1 + 2 -> 3] + - constant folding: if an expression can be computed statically then it + is substituted with the result of computation, e.g., [1 + 2 -> 3] - - neutral element elimination: binary operations with one of - the operands being known to be neutral, are substituted with - the other operand, e.g., [x * 1 -> x] + - neutral element elimination: binary operations with one of the + operands being known to be neutral, are substituted with the other + operand, e.g., [x * 1 -> x] - - zero element propagation: binary operations applied to a - zero element are substituted with the zero element, e.g., - [x * 0 -> 0] + - zero element propagation: binary operations applied to a zero element + are substituted with the zero element, e.g., [x * 0 -> 0] - - symbolic equality reduction: if both branches of a - comparison are syntactically equal then the comparison is - reduced to a boolean constant, e.g., [a = a -> true], - [a < a -> false]. Note, by default a read from a register is - considered as a (co)effect, hence the above transformations - wouldn't be applied, consider passing [~ignore:[Eff.reads]] - if you want such expressions to be reduced. + - symbolic equality reduction: if both branches of a comparison are + syntactically equal then the comparison is reduced to a boolean + constant, e.g., [a = a -> true], [a < a -> false]. Note, by default a + read from a register is considered as a (co)effect, hence the above + transformations wouldn't be applied, consider passing + [~ignore:[Eff.reads]] if you want such expressions to be reduced. - - double complement reduction: an odd amount of complement - operations (one and two) are reduced to one complement of - the same sort, e.g., [~~~1 -> ~1] + - double complement reduction: an odd amount of complement operations + (one and two) are reduced to one complement of the same sort, e.g., + [~~~1 -> ~1] - - binary to unary reduction: reduce a subtraction from zero - to the unary negation, e.g., [0 - x -> -x] + - binary to unary reduction: reduce a subtraction from zero to the unary + negation, e.g., [0 - x -> -x] - - exclusive disjunction reduction: reduces an exclusive - disjunction of syntactically equal expressions to zero, e.g, - [42 ^ 42 -> 0]. Note, by default a read from a register is - considered as a (co)effect, thus [xor eax eax] is not - reduced, consider passing [~ignore:[Eff.reads]] if you want - such expressions to be reduced. + - exclusive disjunction reduction: reduces an exclusive disjunction of + syntactically equal expressions to zero, e.g, [42 ^ 42 -> 0]. Note, by + default a read from a register is considered as a (co)effect, thus + [xor eax eax] is not reduced, consider passing [~ignore:[Eff.reads]] + if you want such expressions to be reduced. - @since 1.3 - *) - val simpl : ?ignore:Eff.t list -> exp -> exp + @since 1.3 *) - (** [is_referenced x exp] true if [exp] contains [Var x] on one of - its leafs. See also {!Bil.is_referenced} and {!Stmt.is_referenced} *) val is_referenced : var -> t -> bool + (** [is_referenced x exp] true if [exp] contains [Var x] on one of its + leafs. See also {!Bil.is_referenced} and {!Stmt.is_referenced} *) - (** [normalize_negatives exp] returns an exp where all negative - additions are substituted by subtractions. See - {!Bil.normalize_negatives} for more details *) val normalize_negatives : t -> t + (** [normalize_negatives exp] returns an exp where all negative additions + are substituted by subtractions. See {!Bil.normalize_negatives} for more + details *) + val fold_consts : t -> t (** [fold_consts x] performs constant folding of the expression [x]. Reduces all computable expressions to integers. See also {!Bil.fold_consts} *) - val fold_consts : t -> t - (** [fixpoint f] applies transformation [f] to [t] until it - reaches a fixpoint, i.e., such point [x] that - [f x] = [f (f x)]. - See also {!Bil.fixpoint} and {!Stmt.fixpoint} - *) - val fixpoint : (t -> t) -> (t -> t) + val fixpoint : (t -> t) -> t -> t + (** [fixpoint f] applies transformation [f] to [t] until it reaches a + fixpoint, i.e., such point [x] that [f x] = [f (f x)]. See also + {!Bil.fixpoint} and {!Stmt.fixpoint} *) - (** [free_vars exp] returns a set of all unbound variables, that - occurs in the expression [exp]. *) val free_vars : t -> Var.Set.t + (** [free_vars exp] returns a set of all unbound variables, that occurs in + the expression [exp]. *) - (** [eval x] evaluate expression [x] to a value. *) val eval : t -> Bil.value + (** [eval x] evaluate expression [x] to a value. *) include Regular.S with type t := t + val pp_adt : Format.formatter -> t -> unit end - (** [Regular] interface for BIL statements *) + (** [Regular] interface for BIL statements *) module Stmt : sig - type t = Bil.stmt - (** All visitors provide some information about the current - position of the visitor *) + (** All visitors provide some information about the current position of the + visitor *) class state : object - (** the stack of stmts that was already visited, with the last on - the top. Not including the currently visiting stmt. *) val preds : stmt list + (** the stack of stmts that was already visited, with the last on the top. + Not including the currently visiting stmt. *) - (** stmts that are not yet visited *) val succs : stmt list + (** stmts that are not yet visited *) - (** a stack of stmts that are parents for the currently visiting - entity. The top one is the one that we're currently visiting. *) val stmts_stack : stmt list + (** a stack of stmts that are parents for the currently visiting entity. + The top one is the one that we're currently visiting. *) - (** is [true] if we're visiting expression that is a jump target *) val in_jmp : bool + (** is [true] if we're visiting expression that is a jump target *) - (** is [true] if we're visiting expression that is on the left or - right side of the assignment. *) val in_move : bool + (** is [true] if we're visiting expression that is on the left or right + side of the assignment. *) - (** is [true] if currently visiting expression or statement is - executed under loop. *) val in_loop : bool + (** is [true] if currently visiting expression or statement is executed + under loop. *) end - (** Visitor. - Visits AST providing lots of hooks. + (** Visitor. Visits AST providing lots of hooks. For each AST constructor [C] the visitor provides three methods: [enter_C], [visit_C], [leave_C]. The default implementation for [enter_C] and [leave_C] is to return its argument. The default - implementation for [visit_C] is the following: - 1. call [enter_C] - 2. visit all children - 3. call [leave_C]. + implementation for [visit_C] is the following: 1. call [enter_C] 2. + visit all children 3. call [leave_C]. - It is recommended to override [enter_C] method if you only need - to visit [C] constructor without changing a way you're visiting - the tree. + It is recommended to override [enter_C] method if you only need to visit + [C] constructor without changing a way you're visiting the tree. - For example, to collect all resolved jumps one could write the - following function: + For example, to collect all resolved jumps one could write the following + function: {[ - let collect_calls bil = (object(self) - inherit [Word.t list] visitor - method! enter_int x js = if in_jmp then x :: js else js - end)#run bil [] + let collect_calls bil = + (object (self) + inherit [Word.t list] visitor + method! enter_int x js = if in_jmp then x :: js else js + end) + #run + bil [] ]} - The default entry point of the visitor is method [run], but - you can use any other method as well, for example, if you do - not have a statement at all and want to visit expression. - *) + The default entry point of the visitor is method [run], but you can use + any other method as well, for example, if you do not have a statement at + all and want to visit expression. *) class ['a] visitor : object inherit ['a] Exp.visitor inherit state - (** Default entry point *) + method run : t list -> 'a -> 'a + (** Default entry point *) - (** {2 Statements} *) method enter_stmt : t -> 'a -> 'a + (** {2 Statements} *) + method visit_stmt : t -> 'a -> 'a method leave_stmt : t -> 'a -> 'a - (** [Move(var,exp)] *) method enter_move : var -> exp -> 'a -> 'a + (** [Move(var,exp)] *) + method visit_move : var -> exp -> 'a -> 'a method leave_move : var -> exp -> 'a -> 'a - (** [Jmp exp] *) method enter_jmp : exp -> 'a -> 'a + (** [Jmp exp] *) + method visit_jmp : exp -> 'a -> 'a method leave_jmp : exp -> 'a -> 'a - (** [While (cond,bil)] *) method enter_while : cond:exp -> t list -> 'a -> 'a + (** [While (cond,bil)] *) + method visit_while : cond:exp -> t list -> 'a -> 'a method leave_while : cond:exp -> t list -> 'a -> 'a - (** [If (cond,yes,no)] *) method enter_if : cond:exp -> yes:t list -> no:t list -> 'a -> 'a + (** [If (cond,yes,no)] *) + method visit_if : cond:exp -> yes:t list -> no:t list -> 'a -> 'a method leave_if : cond:exp -> yes:t list -> no:t list -> 'a -> 'a - (** [CpuExn n] *) method enter_cpuexn : int -> 'a -> 'a + (** [CpuExn n] *) + method visit_cpuexn : int -> 'a -> 'a method leave_cpuexn : int -> 'a -> 'a - (** [Special string] *) method enter_special : string -> 'a -> 'a + (** [Special string] *) + method visit_special : string -> 'a -> 'a method leave_special : string -> 'a -> 'a end - (** A visitor with a shortcut. - Finder is a specialization of a visitor, that uses [return] as its - folding argument. At any time you can stop the traversing by - calling [return] function of the provided argument (which is by - itself is a record with one field - a function accepting argument + (** A visitor with a shortcut. Finder is a specialization of a visitor, that + uses [return] as its folding argument. At any time you can stop the + traversing by calling [return] function of the provided argument (which + is by itself is a record with one field - a function accepting argument of type ['a option]). - For example, the following function will check whether [x] - variable is assigned (i.e., occurs on the left of the - assignment operator) in the provided scope. + For example, the following function will check whether [x] variable is + assigned (i.e., occurs on the left of the assignment operator) in the + provided scope. {[ - let is_assigned x = find (object(self) - inherit [unit] finder - method! enter_move y _rhs cc = - if Var.(x = y) then cc.return (Some ()); cc - end) + let is_assigned x = + find + (object (self) + inherit [unit] finder + + method! enter_move y _rhs cc = + if Var.(x = y) then cc.return (Some ()); + cc + end) ]} - There're three [find] functions in the library, that accepts - an object of type [finder]: + There're three [find] functions in the library, that accepts an object + of type [finder]: - [Bil.finder] searches in the [stmt list] aka [bil] - [Stmt.finder] searches in [stmt] - [Exp.finder] searches in [exp]. - In addition, you can use this object directly, using one of - the two provided entry points. *) + In addition, you can use this object directly, using one of the two + provided entry points. *) class ['a] finder : object inherit ['a option return] visitor method find : t list -> 'a option end - (** AST transformation. - mapper allows one to map AST, performing some limited - amount of transformations on it. Mapper provides extra - flexibility by mapping [stmt] to [stmt list], thus allowing - to remove statements from the output (by mapping to empty list) or - to map one statement to several. This is particularly useful when - you map [if] or [while] statements. *) + (** AST transformation. mapper allows one to map AST, performing some + limited amount of transformations on it. Mapper provides extra + flexibility by mapping [stmt] to [stmt list], thus allowing to remove + statements from the output (by mapping to empty list) or to map one + statement to several. This is particularly useful when you map [if] or + [while] statements. *) class mapper : object inherit Exp.mapper inherit state - (** Default entry point. - But again, you can use any method as an entry *) method run : t list -> t list + (** Default entry point. But again, you can use any method as an entry *) + method map_stmt : t -> t list method map_move : var -> exp -> t list method map_jmp : exp -> t list @@ -3983,215 +3763,170 @@ module Std : sig method map_special : string -> t list end - (** [constant_folder] is a class that implements the [fold_consts] *) class constant_folder : mapper + (** [constant_folder] is a class that implements the [fold_consts] *) - (** [fold ~init visitor stmt] folds a [stmt] with a visitor. See - {!Bil.fold} and {!Exp.fold} for more details. *) val fold : 'a #visitor -> init:'a -> t -> 'a + (** [fold ~init visitor stmt] folds a [stmt] with a visitor. See {!Bil.fold} + and {!Exp.fold} for more details. *) - (** [iter visitor stmt] iters over a [stmt] with a visitor. See - {!Bil.iter} and {!Exp.iter} for more details. *) val iter : unit #visitor -> t -> unit + (** [iter visitor stmt] iters over a [stmt] with a visitor. See {!Bil.iter} + and {!Exp.iter} for more details. *) - (** [map mapper bil] applies [mapper] to the program [bil] *) val map : #mapper -> t list -> t list + (** [map mapper bil] applies [mapper] to the program [bil] *) - (** [find finder stmt] performs a lookup into the Bil statement. See - {!Bil.find} and {!Exp.find} for more details. *) val find : 'a #finder -> t -> 'a option + (** [find finder stmt] performs a lookup into the Bil statement. See + {!Bil.find} and {!Exp.find} for more details. *) - (** [exists finder stmt] is [true] iff [find finder stmt <> None]. - See {!Bil.exists} and {!Exp.exists} for more details. *) val exists : unit #finder -> t -> bool + (** [exists finder stmt] is [true] iff [find finder stmt <> None]. See + {!Bil.exists} and {!Exp.exists} for more details. *) - (** [is_referenced x stmt] is true is [x] is used in the [stmt] - in any place other then right hand side of the assignment. E.g., - [is_referenced x Bil.(x := var x)] is [true], but - [is_referenced x Bil.(x := var y)] is [false]. - see {!Bil.is_referenced} for more details. - *) val is_referenced : var -> t -> bool + (** [is_referenced x stmt] is true is [x] is used in the [stmt] in any place + other then right hand side of the assignment. E.g., + [is_referenced x Bil.(x := var x)] is [true], but + [is_referenced x Bil.(x := var y)] is [false]. see {!Bil.is_referenced} + for more details. *) - (** [normalize ?normalize_exp xs] produces a normalized BIL - program with the same[^1] semantics but in the BIL normalized - form (BNF). There are two normalized forms, both described - below. The first form (BNF1) is more readable, the second form - (BNF2) is more strict, but sometimes yields a code, that is hard - for a human to comprehend. The [BNF1] is the default, to request - [BNF2] pass [normalize_exp:true]. + val normalize : ?normalize_exp:bool -> stmt list -> stmt list + (** [normalize ?normalize_exp xs] produces a normalized BIL program with the + same[^1] semantics but in the BIL normalized form (BNF). There are two + normalized forms, both described below. The first form (BNF1) is more + readable, the second form (BNF2) is more strict, but sometimes yields a + code, that is hard for a human to comprehend. The [BNF1] is the default, + to request [BNF2] pass [normalize_exp:true]. Precondition: [xs] is well-typed. - The BIL First Normalized Form (BNF1) is a subset of the BIL - language, where expressions have the following properties: + The BIL First Normalized Form (BNF1) is a subset of the BIL language, + where expressions have the following properties: - Memory load expressions can be only applied to a memory. This - effectively disallows creation of temporary memory regions, - and requires all store operations to be committed via the - assignment operation. Also, this provides a guarantee, that - store expressions will not occur in integer assignments, jmp - destinations, and conditional expressions, leaving them valid - only in an assignment statement where the rhs has type mem_t. - This is effectively the same as make the [Load] constructor to - have type ([Load (var,exp,endian,size)]). - - - No load or store expressions in the following positions: - 1. the right-hand side of the let expression; - 2. address or value subexpressions of the store expression; - 3. storage or address subexpressions of the load expression; - - The BIL Second Normalized Form (BNF2) is a subset of the BNF1 - (in a sense that all BNF2 programs are also in BNF1). This form - puts the following restrictions: - - - No let expressions - new variables can be created only with - the Move instruction. - - - All memory operations have sizes equal to one byte. Thus the - size and endianness can be ignored in analysis. During the - normalization, the following rewrites are performed - {v - let x = in ... x ... => ... ... - x[a,el]:n => x[a+n-1] @ ... @ x[a] - x[a,be]:n => x[a] @ ... @ x[a+n-1] - m[a,el]:n <- x => (...((m[a] <- x<0>)[a+1] <- x<1>)...)[a+n-1] <- x - m[a,be]:n <- x => (...((m[a] <- x)[a+1] <- x)...)[a+n-1] <- x<0> - (x[a] <- b)[c] => m := x[a] <- b; m[c] - v} + effectively disallows creation of temporary memory regions, and + requires all store operations to be committed via the assignment + operation. Also, this provides a guarantee, that store expressions + will not occur in integer assignments, jmp destinations, and + conditional expressions, leaving them valid only in an assignment + statement where the rhs has type mem_t. This is effectively the same + as make the [Load] constructor to have type + ([Load (var,exp,endian,size)]). + + - No load or store expressions in the following positions: 1. the + right-hand side of the let expression; 2. address or value + subexpressions of the store expression; 3. storage or address + subexpressions of the load expression; + + The BIL Second Normalized Form (BNF2) is a subset of the BNF1 (in a + sense that all BNF2 programs are also in BNF1). This form puts the + following restrictions: + + - No let expressions - new variables can be created only with the Move + instruction. + + {ul + {- All memory operations have sizes equal to one byte. Thus the size + and endianness can be ignored in analysis. During the normalization, + the following rewrites are performed + {v + let x = in ... x ... => ... ... + x[a,el]:n => x[a+n-1] @ ... @ x[a] + x[a,be]:n => x[a] @ ... @ x[a+n-1] + m[a,el]:n <- x => (...((m[a] <- x<0>)[a+1] <- x<1>)...)[a+n-1] <- x + m[a,be]:n <- x => (...((m[a] <- x)[a+1] <- x)...)[a+n-1] <- x<0> + (x[a] <- b)[c] => m := x[a] <- b; m[c] + v} + } + } - [^1]: The normalization procedure may duplicate expressions - that might be considered non-generative. For example, + [^1]: The normalization procedure may duplicate expressions that might + be considered non-generative. For example, [let x = m[a] in x + x] - is rewritten to [m[a] + m[a]]. Given a concrete semantics of a - memory (for example, if memory is mapped to a device register - that changes every times it is read) this expression may have - different value. It will also have different effect (such as - two memory accesses, page faults etc). + is rewritten to [m[a] + m[a]]. Given a concrete semantics of a memory + (for example, if memory is mapped to a device register that changes + every times it is read) this expression may have different value. It + will also have different effect (such as two memory accesses, page + faults etc). - However, in the formal semantics of BAP we do not consider - effects, and treat all expressions as side-effect free, thus the - above transformation, are preserving the semantics. + However, in the formal semantics of BAP we do not consider effects, and + treat all expressions as side-effect free, thus the above + transformation, are preserving the semantics. - @param normalize_exp (defaults to [false]) if set to [true] then - the returned program will be in BNF2. + @param normalize_exp + (defaults to [false]) if set to [true] then the returned program will + be in BNF2. @since 1.3 *) - val normalize : ?normalize_exp:bool -> stmt list -> stmt list - (** [simpl ?ignore xs] recursively applies [Exp.simpl] and also - simplifies [if] and [while] expressions with statically known - conditionals, e.g., [if (true) xs ys] is simplified to [xs], - [while (false) xs] is simplified to [xs]. - - @since 1.3 - *) val simpl : ?ignore:Eff.t list -> t list -> t list + (** [simpl ?ignore xs] recursively applies [Exp.simpl] and also simplifies + [if] and [while] expressions with statically known conditionals, e.g., + [if (true) xs ys] is simplified to [xs], [while (false) xs] is + simplified to [xs]. + + @since 1.3 *) - (** [fixpoint f x] applies transformation [f] until it reaches - fixpoint. See {!Bil.fixpoint} and {Exp.fixpoint}. *) - val fixpoint : (t -> t) -> (t -> t) + val fixpoint : (t -> t) -> t -> t + (** [fixpoint f x] applies transformation [f] until it reaches fixpoint. See + {!Bil.fixpoint} and {!Exp.fixpoint}. *) - (** [free_vars stmt] returns a set of all unbound variables, that - occurs in [stmt]. *) val free_vars : t -> Var.Set.t + (** [free_vars stmt] returns a set of all unbound variables, that occurs in + [stmt]. *) - (** [eval prog] eval BIL program under given context. Returns the - context which contains all effects of computations. *) val eval : t list -> (#Bili.context as 'a) -> 'a + (** [eval prog] eval BIL program under given context. Returns the context + which contains all effects of computations. *) include Regular.S with type t := t val pp_adt : Format.formatter -> t -> unit end - (** Architecture *) + (** Architecture *) module Arch : sig - type x86 = [ - | `x86 - | `x86_64 - ] [@@deriving bin_io, compare, enumerate, sexp] - - type arm = [ - | `armv4 - | `armv5 - | `armv6 - | `armv7 - ] [@@deriving bin_io, compare, enumerate, sexp] - - type armeb = [ - | `armv4eb - | `armv5eb - | `armv6eb - | `armv7eb - ] [@@deriving bin_io, compare, enumerate, sexp] - - type thumb = [ - | `thumbv4 - | `thumbv5 - | `thumbv6 - | `thumbv7 - ] [@@deriving bin_io, compare, enumerate, sexp] - - type thumbeb = [ - | `thumbv4eb - | `thumbv5eb - | `thumbv6eb - | `thumbv7eb - ] [@@deriving bin_io, compare, enumerate, sexp] - - type aarch64 = [ - | `aarch64 - | `aarch64_be - ] - [@@deriving bin_io, compare, enumerate, sexp] + type x86 = [ `x86 | `x86_64 ] [@@deriving bin_io, compare, enumerate, sexp] - type ppc = [ - | `ppc - | `ppc64 - | `ppc64le - ] + type arm = [ `armv4 | `armv5 | `armv6 | `armv7 ] [@@deriving bin_io, compare, enumerate, sexp] - type mips = [ - | `mips - | `mipsel - | `mips64 - | `mips64el - ] + type armeb = [ `armv4eb | `armv5eb | `armv6eb | `armv7eb ] [@@deriving bin_io, compare, enumerate, sexp] - type sparc = [ - | `sparc - | `sparcv9 - ] + type thumb = [ `thumbv4 | `thumbv5 | `thumbv6 | `thumbv7 ] [@@deriving bin_io, compare, enumerate, sexp] - type nvptx = [ - | `nvptx - | `nvptx64 - ] + type thumbeb = [ `thumbv4eb | `thumbv5eb | `thumbv6eb | `thumbv7eb ] [@@deriving bin_io, compare, enumerate, sexp] - type hexagon = [`hexagon] + type aarch64 = [ `aarch64 | `aarch64_be ] [@@deriving bin_io, compare, enumerate, sexp] - type r600 = [`r600] + type ppc = [ `ppc | `ppc64 | `ppc64le ] [@@deriving bin_io, compare, enumerate, sexp] - type systemz = [`systemz] + type mips = [ `mips | `mipsel | `mips64 | `mips64el ] [@@deriving bin_io, compare, enumerate, sexp] - type xcore = [`xcore] + type sparc = [ `sparc | `sparcv9 ] [@@deriving bin_io, compare, enumerate, sexp] - type unknown = [`unknown] + type nvptx = [ `nvptx | `nvptx64 ] [@@deriving bin_io, compare, enumerate, sexp] - type t = [ - | aarch64 + type hexagon = [ `hexagon ] [@@deriving bin_io, compare, enumerate, sexp] + type r600 = [ `r600 ] [@@deriving bin_io, compare, enumerate, sexp] + type systemz = [ `systemz ] [@@deriving bin_io, compare, enumerate, sexp] + type xcore = [ `xcore ] [@@deriving bin_io, compare, enumerate, sexp] + type unknown = [ `unknown ] [@@deriving bin_io, compare, enumerate, sexp] + + type t = + [ aarch64 | arm | armeb | thumb @@ -4205,61 +3940,60 @@ module Std : sig | systemz | x86 | xcore - | unknown - ] [@@deriving bin_io, compare, enumerate, sexp] + | unknown ] + [@@deriving bin_io, compare, enumerate, sexp] - (** [of_string s] will try to be clever and to capture all - commonly known synonyms, e.g., [of_string "i686"] will - work *) val of_string : string -> t option + (** [of_string s] will try to be clever and to capture all commonly known + synonyms, e.g., [of_string "i686"] will work *) - (** [addr_size arch] returns an address size for a a given [arch] *) val addr_size : t -> addr_size + (** [addr_size arch] returns an address size for a a given [arch] *) - (** [endian arch] returns a word endianness of the [arch] *) val endian : t -> endian + (** [endian arch] returns a word endianness of the [arch] *) - (** the architecture (ISA) of a program. *) val slot : (Theory.program, t) Knowledge.slot + (** the architecture (ISA) of a program. *) + val unit_slot : (Theory.Unit.cls, t) Knowledge.slot (** [unit_slot] the arch property of the unit. - Use this slot to enable backward compatibility of the [Arch.t] - type with the [Theory.Target.t] by registering a promise that - translates [Theory.Target.t] to [Arch.t]. + Use this slot to enable backward compatibility of the [Arch.t] type with + the [Theory.Target.t] by registering a promise that translates + [Theory.Target.t] to [Arch.t]. Example, {[ let target = Theory.Target.declare ~package:"foo" "r600" - let () = KB.promise Arch.unit_slot @@ fun unit -> + + let () = + KB.promise Arch.unit_slot @@ fun unit -> KB.collect Theory.Unit.target >>| fun t -> - if Theory.Target.equal t target then `r600 - else `unknown - ]} - *) - val unit_slot : (Theory.Unit.cls, t) Knowledge.slot + if Theory.Target.equal t target then `r600 else `unknown + ]} *) - (** [arch] type implements [Regular] interface *) include Regular.S with type t := t + (** [arch] type implements [Regular] interface *) end - (** architecture *) - type arch = Arch.t - [@@deriving bin_io, compare, sexp] + type arch = Arch.t [@@deriving bin_io, compare, sexp] + (** architecture *) (** Universal Values. - This module creates an extensible variant type, that resembles - extensible variant types, introduced in 4.02, but even more safe - and more extensible, and, what really matters, - serializable. Basically you should think of [Value.t] as a union - type, aka sum type, that can be extended in any place, including - your plugin code. Where extending is adding new constructor. To - add new constructor, you need to register it, e.g., + This module creates an extensible variant type, that resembles extensible + variant types, introduced in 4.02, but even more safe and more extensible, + and, what really matters, serializable. Basically you should think of + [Value.t] as a union type, aka sum type, that can be extended in any + place, including your plugin code. Where extending is adding new + constructor. To add new constructor, you need to register it, e.g., {[ - let function_signature = Value.Tag.register (module String) + let function_signature = + Value.Tag.register + (module String) ~name:"function_signature" ~uuid:"2175c28c-08ca-4052-8385-3a01e1c6ab6f" ]} @@ -4270,22 +4004,22 @@ module Std : sig | Function_signature of string ]} - to existing union type. The main difference is that the [name] - shouldn't be unique (in fact [name] doesn't bear any semantic - meaning, it basically for pretty-printing). On the other hand - the [uuid] parameter must be unique across the universe, space - and time. To get the UUID with such properties, you can use - [uuidgen] program that is usually available on Linux and Mac OS. + to existing union type. The main difference is that the [name] shouldn't + be unique (in fact [name] doesn't bear any semantic meaning, it basically + for pretty-printing). On the other hand the [uuid] parameter must be + unique across the universe, space and time. To get the UUID with such + properties, you can use [uuidgen] program that is usually available on + Linux and Mac OS. - [name] and [uuid] must be strings, known at compile time, in - other words it must be string literal, not just an arbitrary - string, created dynamically. This is made intentionally, in - order to prevent the abuse of the system. + [name] and [uuid] must be strings, known at compile time, in other words + it must be string literal, not just an arbitrary string, created + dynamically. This is made intentionally, in order to prevent the abuse of + the system. - The [(module String)] syntax creates a value from the module - [String], (so called first-class module). The module should - implement [Value.S] signature, that requires pretty-printing, - comparison function and serialization. + The [(module String)] syntax creates a value from the module [String], (so + called first-class module). The module should implement [Value.S] + signature, that requires pretty-printing, comparison function and + serialization. {[ module type S = sig @@ -4295,11 +4029,10 @@ module Std : sig end ]} - The good news is that, most of the types in [Core] and [Bap] do - conform with the requirements. Usually, one can implement the - requirements very easily by using type-driven syntax extensions - (although, you still need to implement pretty-printing function - yourself): + The good news is that, most of the types in [Core] and [Bap] do conform + with the requirements. Usually, one can implement the requirements very + easily by using type-driven syntax extensions (although, you still need to + implement pretty-printing function yourself): {[ module Loc = struct @@ -4315,20 +4048,20 @@ module Std : sig ~uuid:"400e190e-ce21-488d-87b1-c101709621a8" ]} - The returned value, is a tag that can be used to constructed - values of that branch, and to deconstruct (extract) them. You - may think of it as a cipher key, that is used to package data - into the value container, and later to unpack it: + The returned value, is a tag that can be used to constructed values of + that branch, and to deconstruct (extract) them. You may think of it as a + cipher key, that is used to package data into the value container, and + later to unpack it: {[ # let main_pos = Value.create loc ("test.c", 20, 2);; val main_pos : value = test.c:20:2 ]} - You may see, that OCaml pretty-prints the value. That's neat! - Also, you may see, that the returned expression has type - [value]. That means that it can be used uniformly with other - values, for example, you can put them in one container, e.g., + You may see, that OCaml pretty-prints the value. That's neat! Also, you + may see, that the returned expression has type [value]. That means that it + can be used uniformly with other values, for example, you can put them in + one container, e.g., {[ # let main_t = Value.create function_signature @@ -4351,347 +4084,336 @@ module Std : sig - : Loc.t option = Some ("test.c", 20, 2) ]} - This will require an extra allocation of an [option] container, - and in a performance critical context it may be unacceptable. - For this special case you can use a more efficient: + This will require an extra allocation of an [option] container, and in a + performance critical context it may be unacceptable. For this special case + you can use a more efficient: - {[if Value.is loc then Value.get_exn loc main_pos]}. + {[ + if Value.is loc then Value.get_exn loc main_pos + ]} + . - Underneath the hood, the values of type [value] is just a pair - of an original value and runtime type information. + Underneath the hood, the values of type [value] is just a pair of an + original value and runtime type information. - The comparison of two values of type [value] is actually a - multi-method, as it has the following behavior: + The comparison of two values of type [value] is actually a multi-method, + as it has the following behavior: - 1. If both values has the same type, then use [compare] - function, that was provided for this type. - 2. If values are of different types, that are known to - the type system, then compare them using RTTI, and ignore the - value. - 3. If at least one of the values is of the unknown type, - (i.e., type wasn't registered in the type system), then - use polymorphic compare on a tuple of UUID and binary - representation of the values. + 1. If both values has the same type, then use [compare] function, that was + provided for this type. 2. If values are of different types, that are + known to the type system, then compare them using RTTI, and ignore the + value. 3. If at least one of the values is of the unknown type, (i.e., + type wasn't registered in the type system), then use polymorphic compare + on a tuple of UUID and binary representation of the values. - The rules above guarantee, that values with different RTTI id - are never equal. It also guarantees that the ordering will be - preserved between different builds of a program, and even - between different versions of the compiler. + The rules above guarantee, that values with different RTTI id are never + equal. It also guarantees that the ordering will be preserved between + different builds of a program, and even between different versions of the + compiler. {2 Thread safety} - The only thread unsafe function is [register], that should be - called in the module initialization time. In general programs - modules are initialized in a single thread, so this shouldn't be - an issue. The implementation by itself doesn't call [register]. - *) + The only thread unsafe function is [register], that should be called in + the module initialization time. In general programs modules are + initialized in a single thread, so this shouldn't be an issue. The + implementation by itself doesn't call [register]. *) module Value : sig - - (** a universal value *) type t = value [@@deriving bin_io, compare, sexp] + (** a universal value *) - (** Tag constructor of type ['a] *) type 'a tag + (** Tag constructor of type ['a] *) (** A required interface for the type to be lifted to value. *) module type S = sig - (** In order to construct a value with the a given type you must - provide an implementation for marshaling functions, - comparison function and pretty-printing. *) type t [@@deriving bin_io, compare, sexp] + (** In order to construct a value with the a given type you must provide + an implementation for marshaling functions, comparison function and + pretty-printing. *) + val pp : Format.formatter -> t -> unit end - (** uninhabited type *) type void + (** uninhabited type *) - (** literal string. Don't look at the right hand side of a type - equation, this is just a way to say that a string should be a - literal not a value. Compiler will automatically coerce your - string literals to this type. *) - type literal = (void,void,void) format + type literal = (void, void, void) format + (** literal string. Don't look at the right hand side of a type equation, + this is just a way to say that a string should be a literal not a value. + Compiler will automatically coerce your string literals to this type. *) - (** persistent type identifier *) type typeid [@@deriving bin_io, compare, sexp] + (** persistent type identifier *) - (** [create cons x] creates a value using constructor [cons] and - argument [x] *) val create : 'a tag -> 'a -> t + (** [create cons x] creates a value using constructor [cons] and argument + [x] *) - (** [is cons v] true if value [v] was constructed with constructor - [cons], i.e., it is true only when [is_cons t (create t x)] *) - val is : 'a tag -> t -> bool + val is : 'a tag -> t -> bool + (** [is cons v] true if value [v] was constructed with constructor [cons], + i.e., it is true only when [is_cons t (create t x)] *) - (** [get cons] extracts a value associated with a constructor [cons] - (Essentially, performs a pattern match on the specified variant - branch) *) val get : 'a tag -> t -> 'a option + (** [get cons] extracts a value associated with a constructor [cons] + (Essentially, performs a pattern match on the specified variant branch) + *) - (** [get_exn t v] extracts value created with [t] from the - variant. Raises unspecified exception if variant [v] wasn't - created with [t]. *) val get_exn : 'a tag -> t -> 'a + (** [get_exn t v] extracts value created with [t] from the variant. Raises + unspecified exception if variant [v] wasn't created with [t]. *) - (** [tagname value] returns a constructor name of the [value] *) val tagname : t -> string + (** [tagname value] returns a constructor name of the [value] *) - (** [typeid value] returns a type identifier of the [value] *) val typeid : t -> typeid + (** [typeid value] returns a type identifier of the [value] *) - (** Variants of values. *) + (** Variants of values. *) module Tag : sig type 'a t = 'a tag - (** [register ~name ~uuid (module T)] creates a new variant - constructor that accepts values of type [T.t]. Module [T] - should implement [Binable.S] and [Sexpable.S] interfaces, - provide [compare] and pretty-printing [pp] functions. This - functions will be used to print, compare and serialize - values. - - The returned value of type [T.t tag] is a special key that - can be used with [create] and [get] functions to pack and - unpack values of type [T.t] into [value]. - - Registration of a value tag, automatically adds a - property slot to the [Theory.program] class. Then property - name is [package:name] where [package] defaults to - [uuid]. - - No matter of the [package] name the [uuid] parameter is used - as a [typeid] and to serialize and de-serialize values. + val register : + ?public:bool -> + ?desc:string -> + ?package:string -> + name:string -> + uuid:string -> + (module S with type t = 'a) -> + 'a tag + (** [register ~name ~uuid (module T)] creates a new variant constructor + that accepts values of type [T.t]. Module [T] should implement + [Binable.S] and [Sexpable.S] interfaces, provide [compare] and + pretty-printing [pp] functions. This functions will be used to print, + compare and serialize values. + + The returned value of type [T.t tag] is a special key that can be used + with [create] and [get] functions to pack and unpack values of type + [T.t] into [value]. + + Registration of a value tag, automatically adds a property slot to the + [Theory.program] class. Then property name is [package:name] where + [package] defaults to [uuid]. + + No matter of the [package] name the [uuid] parameter is used as a + [typeid] and to serialize and de-serialize values. Note, this function delegates most of it work to {!register_slot}. @since 2.2.0 adds [public], [desc], and [package] parameter - @since 2.2.0 changed the defined slot name to [package:name] - *) - val register : - ?public:bool -> - ?desc:string -> - ?package:string -> name:string -> uuid:string -> - (module S with type t = 'a) -> 'a tag + @since 2.2.0 changed the defined slot name to [package:name] *) + val register_slot : + ?uuid:string -> + (Theory.program, 'a option) KB.slot -> + (module S with type t = 'a) -> + 'a tag (** [register_slot s f] registers a KB property as a value. - An existing property of the [Theory.program] class can be - also represented as BAP value and attached directly to - program attributes, memory locations, or stored in the - project dictionary. - *) - val register_slot : ?uuid:string -> - (Theory.program,'a option) KB.slot -> - (module S with type t = 'a) -> 'a tag + An existing property of the [Theory.program] class can be also + represented as BAP value and attached directly to program attributes, + memory locations, or stored in the project dictionary. *) - (** [slot tag] returns a slot associated with the tag. *) val slot : 'a t -> (Theory.program, 'a option) KB.slot + (** [slot tag] returns a slot associated with the tag. *) - (** [name cons] returns a name of a constructor. *) val name : 'a t -> string + (** [name cons] returns a name of a constructor. *) - (** [same x y] is true if tags [x] and [y] have the same type. *) val same : 'a t -> 'b t -> bool + (** [same x y] is true if tags [x] and [y] have the same type. *) - (** [same_witness x y] returns a value witnessing that value tags - [x] and [y] has the same type. *) - val same_witness : 'a t -> 'b t -> ('a,'b) Type_equal.t option + val same_witness : 'a t -> 'b t -> ('a, 'b) Type_equal.t option + (** [same_witness x y] returns a value witnessing that value tags [x] and + [y] has the same type. *) - (** [same_witness_exn x y] is the same as [same_witness] but - raises exception if [not (same x y)]. *) - val same_witness_exn : 'a t -> 'b t -> ('a,'b) Type_equal.t + val same_witness_exn : 'a t -> 'b t -> ('a, 'b) Type_equal.t + (** [same_witness_exn x y] is the same as [same_witness] but raises + exception if [not (same x y)]. *) - (** [typeid t] returns a type identifier of a type tag [t]. *) val typeid : 'a t -> typeid + (** [typeid t] returns a type identifier of a type tag [t]. *) end - (** Runtime parallel match. *) + (** Runtime parallel match. *) module Match : sig - (** This module can be used to handle several cases in parallel - instead of using a sequence of nested matches or if/then/else - chains. + type 'a t + (** This module can be used to handle several cases in parallel instead of + using a sequence of nested matches or if/then/else chains. The combinators in the module are designed to be used as follows: {[ - let lift v = Match.(begin - switch v @@ - case memory_load (fun x -> `Load x) @@ - case memory_store (fun x -> `Store x) @@ - case register_read (fun x -> `Read x) @@ - default (fun () -> `Unknown) - end) + let lift v = + Match.( + switch v + @@ case memory_load (fun x -> `Load x) + @@ case memory_store (fun x -> `Store x) + @@ case register_read (fun x -> `Read x) + @@ default (fun () -> `Unknown)) ]} - Note: in the example, the whole expression will build and - then match. In case when performance matter, and when there - is more then one match, it is recommended to evaluate a - matching object first, and return a function, that matches - values. For this there is a [select] combinator: + Note: in the example, the whole expression will build and then match. + In case when performance matter, and when there is more then one + match, it is recommended to evaluate a matching object first, and + return a function, that matches values. For this there is a [select] + combinator: {[ let lift = - Match.(begin - select @@ - case memory_load (fun x -> `Load x) @@ - case memory_store (fun x -> `Store x) @@ - case register_read (fun x -> `Read x) @@ - default (fun () -> `Unknown) - end) - ]} + Match.( + select + @@ case memory_load (fun x -> `Load x) + @@ case memory_store (fun x -> `Store x) + @@ case register_read (fun x -> `Read x) + @@ default (fun () -> `Unknown)) + ]} *) - *) - type 'a t - - (** [switch x matcher] applies [matcher] to value [x] *) val switch : value -> 's t -> 's + (** [switch x matcher] applies [matcher] to value [x] *) - (** [select matcher x] applies [matcher] to value [x]. - [select] is the same as [Fn.flip switch]. *) val select : 's t -> value -> 's + (** [select matcher x] applies [matcher] to value [x]. [select] is the + same as [Fn.flip switch]. *) - (** [case tag action matcher] adds an [action] to [matcher] that - will be invoked for values with a a given [tag] *) val case : 'a tag -> ('a -> 's) -> 's t -> 's t + (** [case tag action matcher] adds an [action] to [matcher] that will be + invoked for values with a a given [tag] *) - (** [default def] creates an empty matcher with default handler [def]. *) val default : (unit -> 's) -> 's t + (** [default def] creates an empty matcher with default handler [def]. *) end - (** Persistent type identifiers. *) module Typeid : Identifiable with type t = typeid + (** Persistent type identifiers. *) - (** Although values of type [value] implements regular interface - it is recommended to used [dict] data structure instead of - those, that are provided by [Regular] interface.x *) include Regular.S with type t := t + (** Although values of type [value] implements regular interface it is + recommended to used [dict] data structure instead of those, that are + provided by [Regular] interface.x *) end type 'a tag = 'a Value.tag - (** Universal Heterogeneous Map. *) + (** Universal Heterogeneous Map. *) module Dict : sig - (** The dictionary can store values of arbitrary type. Only one - value of a a given tag can be stored in the map. For example, if - you have tag [cconv] (calling convention) then it is - guaranteed that in map there is zero or one value with this - tag. *) + (** The dictionary can store values of arbitrary type. Only one value of a a + given tag can be stored in the map. For example, if you have tag [cconv] + (calling convention) then it is guaranteed that in map there is zero or + one value with this tag. *) - (** type of map *) type t = dict [@@deriving bin_io, compare, sexp] + (** type of map *) - (** an empty instance *) val empty : t + (** an empty instance *) - (** [is_empty map] true if is empty. *) val is_empty : t -> bool + (** [is_empty map] true if is empty. *) - (** [set map tag x] inserts or update *) val set : t -> 'a tag -> 'a -> t + (** [set map tag x] inserts or update *) - (** [mem map tag] checks membership *) val mem : t -> 'a tag -> bool + (** [mem map tag] checks membership *) - (** [find map tag] lookups value *) val find : t -> 'a tag -> 'a option + (** [find map tag] lookups value *) - (** [add map tag x] adds new value *) - val add : t -> 'a tag -> 'a -> [`Ok of t | `Duplicate] + val add : t -> 'a tag -> 'a -> [ `Ok of t | `Duplicate ] + (** [add map tag x] adds new value *) - (** [change map tag f] changes value. *) val change : t -> 'a tag -> ('a option -> 'a option) -> t + (** [change map tag f] changes value. *) - (** [remove map tag] returns a map without a value associated - with [tag] *) val remove : t -> 'a tag -> t + (** [remove map tag] returns a map without a value associated with [tag] *) - (** [to_sequence dict] is a sequence of all tid value - entries *) val to_sequence : t -> (Value.typeid * value) seq + (** [to_sequence dict] is a sequence of all tid value entries *) - (** [data dict] is a sequence of all dict elements *) val data : t -> value seq + (** [data dict] is a sequence of all dict elements *) - (** [filter dict ~f] returns a new dict, filtered with [f] *) val filter : t -> f:(value -> bool) -> t + (** [filter dict ~f] returns a new dict, filtered with [f] *) end - (** {{!Vector}Resizable array} *) type 'a vector + (** {{!Vector}Resizable array} *) (** Resizable Array. - Resizable arrays with a logarithmic push_back in the style of - C++. A user need to provide a default value (c.f., - DefaultConstructible requirement in C++ version). *) + Resizable arrays with a logarithmic push_back in the style of C++. A user + need to provide a default value (c.f., DefaultConstructible requirement in + C++ version). *) module Vector : sig - (** a type of vector holding elements of type ['a] *) type 'a t = 'a vector [@@deriving bin_io, compare, sexp] + (** a type of vector holding elements of type ['a] *) - (** [create ?capacity default] creates an empty vector with a a given - [capacity]. It is guaranteed that the default value will never - be seen by the user unless he put it into the vector explicitly - with [append] or [set]. - *) val create : ?capacity:int -> 'a -> 'a t + (** [create ?capacity default] creates an empty vector with a a given + [capacity]. It is guaranteed that the default value will never be seen + by the user unless he put it into the vector explicitly with [append] or + [set]. *) - (** [append xs x] appends [x] to the end of [xs] *) val append : 'a t -> 'a -> unit + (** [append xs x] appends [x] to the end of [xs] *) - (** [nth vec n] returns [n]'th element of vector [vec] *) val nth : 'a t -> int -> 'a option + (** [nth vec n] returns [n]'th element of vector [vec] *) - (** [get vec n] like [nth] but raises exception if index is out of - bounds *) val get : 'a t -> int -> 'a + (** [get vec n] like [nth] but raises exception if index is out of bounds *) + val set : 'a t -> int -> 'a -> unit (** [set vec n x] sets [n]'th element of a vector [vec] to [x] if [n < length vec] then raises exception *) - val set : 'a t -> int -> 'a -> unit - (** [map_to_array xs ~f] copies data from [xs] to an array applying - [f] to each element. See also [to_array] function from - [Container.S1] interface *) val map_to_array : 'a t -> f:('a -> 'b) -> 'b array + (** [map_to_array xs ~f] copies data from [xs] to an array applying [f] to + each element. See also [to_array] function from [Container.S1] interface + *) - (** [findi xs ~f] returns an index [i] and a value [x] of the first - element of [xs], for which [f i x] is [true]. *) val findi : 'a t -> f:(int -> 'a -> bool) -> (int * 'a) option + (** [findi xs ~f] returns an index [i] and a value [x] of the first element + of [xs], for which [f i x] is [true]. *) - (** [iter xs ~f] applies [f i x] for each [x_i] in [xs] *) val iteri : 'a t -> f:(int -> 'a -> unit) -> unit + (** [iter xs ~f] applies [f i x] for each [x_i] in [xs] *) - (** [foldi xs ~init:s_0 ~f] computes [f n s_n x_n], where [s_n = f - (n-1) s_[n-1] x_[n-1]] and [n] is the number of elements in - [xs] *) val foldi : 'a t -> init:'b -> f:(int -> 'b -> 'a -> 'b) -> 'b + (** [foldi xs ~init:s_0 ~f] computes [f n s_n x_n], where + [s_n = f (n-1) s_[n-1] x_[n-1]] and [n] is the number of elements in + [xs] *) - (** [index ?equal xs x] returns an index of the first element [p] of - [xs] for which [equal p x] is [true]. The [equal] parameter - defaults to the OCaml builtin polymorphic equality. *) val index : ?equal:('a -> 'a -> bool) -> 'a t -> 'a -> int option + (** [index ?equal xs x] returns an index of the first element [p] of [xs] + for which [equal p x] is [true]. The [equal] parameter defaults to the + OCaml builtin polymorphic equality. *) - (** [index_exn ?equal xs x] is the same as [index ?equal xs x] but - an exception is thrown instead of [None] *) val index_exn : ?equal:('a -> 'a -> bool) -> 'a t -> 'a -> int + (** [index_exn ?equal xs x] is the same as [index ?equal xs x] but an + exception is thrown instead of [None] *) - (** [index_with ?equal ~default xs x] same as [index] but returns - the [default] value instead of [None]. *) - val index_with : ?equal:('a -> 'a -> bool) -> default:int -> 'a t -> 'a -> int + val index_with : + ?equal:('a -> 'a -> bool) -> default:int -> 'a t -> 'a -> int + (** [index_with ?equal ~default xs x] same as [index] but returns the + [default] value instead of [None]. *) - (** implements common accessors for the array, like [find], [fold], - [iter], etc *) include Container.S1 with type 'a t := 'a t + (** implements common accessors for the array, like [find], [fold], [iter], + etc *) - (** [pp pp_elem] creates a vector printer that uses [pp_elem] to - print elements. *) - val pp : (Format.formatter -> 'a -> unit) -> (Format.formatter -> 'a t -> unit) + val pp : + (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit + (** [pp pp_elem] creates a vector printer that uses [pp_elem] to print + elements. *) end + type 'a term [@@deriving bin_io, compare, sexp] (** BAP IR. - Program is a tree of terms. - *) - type 'a term [@@deriving bin_io, compare, sexp] + Program is a tree of terms. *) type program [@@deriving bin_io, compare, sexp] type sub [@@deriving bin_io, compare, sexp] @@ -4701,188 +4423,186 @@ module Std : sig type def [@@deriving bin_io, compare, sexp] type jmp [@@deriving bin_io, compare, sexp] type nil [@@deriving bin_io, compare, sexp] - type tid = Theory.Label.t [@@deriving bin_io, compare, sexp] type call [@@deriving bin_io, compare, sexp] - (** target of control transfer *) + (** target of control transfer *) type label = - | Direct of tid (** direct jump *) - | Indirect of exp (** indirect jump *) + | Direct of tid (** direct jump *) + | Indirect of exp (** indirect jump *) [@@deriving bin_io, compare, sexp] - (** control transfer variants *) + (** control transfer variants *) type jmp_kind = - | Call of call (** call to subroutine *) - | Goto of label (** jump inside subroutine *) - | Ret of label (** return from call to label *) - | Int of int * tid (** interrupt and return to tid *) + | Call of call (** call to subroutine *) + | Goto of label (** jump inside subroutine *) + | Ret of label (** return from call to label *) + | Int of int * tid (** interrupt and return to tid *) [@@deriving bin_io, compare, sexp] - (** argument intention *) + (** argument intention *) type intent = - | In (** input argument *) - | Out (** output argument *) - | Both (** input/output *) + | In (** input argument *) + | Out (** output argument *) + | Both (** input/output *) [@@deriving bin_io, compare, sexp] - type ('a,'b) cls + type ('a, 'b) cls + + (** {4 Term type classes} *) + + val program_t : (nil, program) cls + (** program *) + + val sub_t : (program, sub) cls + (** sub *) - (** {4 Term type classes} *) + val arg_t : (sub, arg) cls + (** arg *) - val program_t : (nil, program) cls (** program *) - val sub_t : (program, sub) cls (** sub *) - val arg_t : (sub, arg) cls (** arg *) - val blk_t : (sub, blk) cls (** blk *) - val phi_t : (blk, phi) cls (** phi *) - val def_t : (blk, def) cls (** def *) - val jmp_t : (blk, jmp) cls (** jmp *) + val blk_t : (sub, blk) cls + (** blk *) + + val phi_t : (blk, phi) cls + (** phi *) + + val def_t : (blk, def) cls + (** def *) + + val jmp_t : (blk, jmp) cls + (** jmp *) (** BIR Interpreter - @deprecated Use the Primus Framework. - *) + @deprecated Use the Primus Framework. *) module Biri : sig open Bil.Result - (** Biri evaluates terms in the context of a whole program (since - terms may contain calls and jumps). - Biri also tracks for current position inside block, the block - and preceding block. + (** Biri evaluates terms in the context of a whole program (since terms may + contain calls and jumps). Biri also tracks for current position inside + block, the block and preceding block. - Note, that even if some properties do not provide setters, they - can still change during the evaluation, as other - implementations may override them and provide different behavior.*) - class context : ?main : sub term -> program term -> object('s) - inherit Expi.context + Note, that even if some properties do not provide setters, they can + still change during the evaluation, as other implementations may + override them and provide different behavior.*) + class context : ?main:sub term -> program term -> object ('s) + inherit Expi.context - (** current model of a program. *) - method program : program term + method program : program term + (** current model of a program. *) - (** the entry point of evaluation *) - method main : sub term option + method main : sub term option + (** the entry point of evaluation *) - (** list of term that were already executed (may be long) *) - method trace : tid list + method trace : tid list + (** list of term that were already executed (may be long) *) - (** Should be called when a new term is entered. This - implementation will update the trace list with the passed - argument. *) - method enter_term : tid -> 's + method enter_term : tid -> 's + (** Should be called when a new term is entered. This implementation will + update the trace list with the passed argument. *) - (** [set_next tid] set the identifier of the next term. *) - method set_next : tid option -> 's + method set_next : tid option -> 's + (** [set_next tid] set the identifier of the next term. *) - (** The [next] term identifier is the identifier of a term, - that should be executed next. If [next] is [None] then, - the interpretation will stop. The identifier must belong - to a term, that is in the [program] and is either an - identifier of a block or a subroutine. *) - method next : tid option - end + method next : tid option + (** The [next] term identifier is the identifier of a term, that should be + executed next. If [next] is [None] then, the interpretation will stop. + The identifier must belong to a term, that is in the [program] and is + either an identifier of a block or a subroutine. *) + end module type S = sig + type ('a, 'e) state + type 'a u = (unit, 'a) state + type 'a r = (Bil.result, 'a) state - type ('a,'e) state - type 'a u = (unit,'a) state - type 'a r = (Bil.result,'a) state - - module Expi : Expi.S with type ('a,'e) state = ('a,'e) state + module Expi : Expi.S with type ('a, 'e) state = ('a, 'e) state - (** base class for BIR interpreters *) + (** base class for BIR interpreters *) class ['a] t : object constraint 'a = #context inherit ['a] Expi.t - (** called for each term, just after the position is updated, - but before any side effect of term evaluation had occurred.*) - method enter_term : 't 'p . ('p,'t) cls -> 't term -> 'a u + method enter_term : 't 'p. ('p, 't) cls -> 't term -> 'a u + (** called for each term, just after the position is updated, but before + any side effect of term evaluation had occurred.*) - (** [eval cls t] evaluates a term [t] of the [cls] class. The - method implementation will call the [enter_term] method, - and then will dispatch to the [eval_XXX] method, where - [XXX] is a name of a term corresponding to [cls]. Finally, - the [leave_term] method is called. *) - method eval : 't 'p . ('p,'t) cls -> 't term -> 'a u + method eval : 't 'p. ('p, 't) cls -> 't term -> 'a u + (** [eval cls t] evaluates a term [t] of the [cls] class. The method + implementation will call the [enter_term] method, and then will + dispatch to the [eval_XXX] method, where [XXX] is a name of a term + corresponding to [cls]. Finally, the [leave_term] method is called. + *) - (** called after all side effects of the term has occurred *) - method leave_term : 't 'p . ('p,'t) cls -> 't term -> 'a u + method leave_term : 't 'p. ('p, 't) cls -> 't term -> 'a u + (** called after all side effects of the term has occurred *) + method eval_sub : sub term -> 'a u (** Evaluates a subroutine with the following algorithm: - 0. next <- first block of subroutine and goto 1 - 1. eval all in and in/out arguments and goto 2 - 2. if next is some blk then eval it and goto 2 else goto 3 - 3. if next is some sub then eval it and goto 2 else goto 4 - 4. eval all out and in/out arguments. - *) - method eval_sub : sub term -> 'a u + 0. next <- first block of subroutine and goto 1 1. eval all in and + in/out arguments and goto 2 2. if next is some blk then eval it and + goto 2 else goto 3 3. if next is some sub then eval it and goto 2 + else goto 4 4. eval all out and in/out arguments. *) - (** evaluate argument by first evaluating its right hand side, - and then assigning the result to the left hand side.*) method eval_arg : arg term -> 'a u + (** evaluate argument by first evaluating its right hand side, and then + assigning the result to the left hand side.*) - (** evaluate all terms in a given block, starting with phi - nodes, then proceeding to def nodes and finally evaluating - all jmp terms until either jump is taken or jump condition - is undefined. - After the evaluation the context#next will point next - destination. *) method eval_blk : blk term -> 'a u + (** evaluate all terms in a given block, starting with phi nodes, then + proceeding to def nodes and finally evaluating all jmp terms until + either jump is taken or jump condition is undefined. After the + evaluation the context#next will point next destination. *) - (** evaluate definition by assigning the result of the right - hand side to the definition variable *) method eval_def : def term -> 'a u + (** evaluate definition by assigning the result of the right hand side + to the definition variable *) - (** based on trace select an expression and assign its - value to the left hand side of phi node. *) method eval_phi : phi term -> 'a u + (** based on trace select an expression and assign its value to the left + hand side of phi node. *) - (** evaluate condition, and if it is false, then do nothing, - otherwise evaluate jump target (see below) *) method eval_jmp : jmp term -> 'a u + (** evaluate condition, and if it is false, then do nothing, otherwise + evaluate jump target (see below) *) - (** evaluate label, using [eval_direct] or [eval_indirect], based - on the label variant *) method eval_goto : label -> 'a u + (** evaluate label, using [eval_direct] or [eval_indirect], based on the + label variant *) - (** evaluate target label, using [eval_direct] or - [eval_indirect], based on the label variant. - Ignores return label. *) method eval_call : call -> 'a u + (** evaluate target label, using [eval_direct] or [eval_indirect], based + on the label variant. Ignores return label. *) - (** evaluate label, using [eval_direct] or [eval_indirect], based - on the label variant *) - method eval_ret : label -> 'a u + method eval_ret : label -> 'a u + (** evaluate label, using [eval_direct] or [eval_indirect], based on the + label variant *) - (** ignore arguments and set context#next to None *) - method eval_exn : int -> tid -> 'a u + method eval_exn : int -> tid -> 'a u + (** ignore arguments and set context#next to None *) - (** set context#next to the a given tid *) method eval_direct : tid -> 'a u + (** set context#next to the a given tid *) - (** ignore argument and set context#next to None *) method eval_indirect : exp -> 'a u + (** ignore argument and set context#next to None *) end end - module Make(M : Monad.State.S2) : - S with type ('a,'e) state = ('a,'e) M.t - - include S with type ('a,'e) state = ('a,'e) Monad.State.t - end [@@deprecated "[since 2018-03] in favor of the Primus Framework"] - + module Make (M : Monad.State.S2) : S with type ('a, 'e) state = ('a, 'e) M.t + include S with type ('a, 'e) state = ('a, 'e) Monad.State.t + end + [@@deprecated "[since 2018-03] in favor of the Primus Framework"] - (** BIR {{!Biri}interpreter} - @deprecated Use the Primus Framework - *) class ['a] biri : ['a] Biri.t - [@@deprecated "[since 2018-03] in favor of the Primus Framework"] + (** BIR {{!Biri}interpreter} + @deprecated Use the Primus Framework *) (** {3 Some predefined tags} *) - type color = [ - | `black + type color = + [ `black | `red | `green | `yellow @@ -4890,316 +4610,312 @@ module Std : sig | `magenta | `cyan | `white - | `gray - ] [@@deriving bin_io, compare, sexp] + | `gray ] + [@@deriving bin_io, compare, sexp] - (** Color something with a color *) val color : color tag + (** Color something with a color *) - (** print marked entity with the specified color. (the same - as color, but pretty printing function will output ascii escape - sequence of corresponding color. *) val foreground : color tag + (** print marked entity with the specified color. (the same as color, but + pretty printing function will output ascii escape sequence of + corresponding color. *) - (** print marked entity with specified color. See [foreground]. *) val background : color tag + (** print marked entity with specified color. See [foreground]. *) - (** A human readable comment *) val comment : string tag + (** A human readable comment *) - (** A command in python language *) val python : string tag + (** A command in python language *) - (** A command in shell language *) val shell : string tag + (** A command in shell language *) - (** Mark something as marked *) val mark : unit tag + (** Mark something as marked *) - (** Give a weight *) val weight : float tag + (** Give a weight *) - (** A virtual address of an entity *) val address : addr tag + (** A virtual address of an entity *) - (** A name of a file *) val filename : string tag + (** A name of a file *) - (** an image loaded into memory *) type image + (** an image loaded into memory *) - (** opaque memory *) type mem [@@deriving sexp_of] + (** opaque memory *) - (** a table from memory to ['a] *) type 'a table [@@deriving sexp_of] + (** a table from memory to ['a] *) - (** interval trees from memory regions to ['a] *) type 'a memmap [@@deriving sexp_of] + (** interval trees from memory regions to ['a] *) - (** Iterators lifted into monad *) + (** Iterators lifted into monad *) module type Memory_iterators = sig type t type 'a m - (** [fold ~word_size ~init ~f t] folds over elements of [t], - so a result is [f (... (f (f a elt_1) elt_2) ...) elt_n] *) - val fold : ?word_size:size -> t -> init:'b -> f:(word -> 'b -> 'b m) -> 'b m + val fold : ?word_size:size -> t -> init:'b -> f:(word -> 'b -> 'b m) -> 'b m + (** [fold ~word_size ~init ~f t] folds over elements of [t], so a result is + [f (... (f (f a elt_1) elt_2) ...) elt_n] *) + val iter : ?word_size:size -> t -> f:(word -> unit m) -> unit m (** [iter ~word_size ~f t] applies [f] to elements of [t] *) - val iter : ?word_size:size -> t -> f:(word -> unit m) -> unit m - (** [foldi ~word_size ~init ~f t] is like {!fold}, but also passes - an address to the [f] *) - val foldi : ?word_size:size -> t -> init:'b -> f:(addr -> word -> 'b -> 'b m) -> 'b m + val foldi : + ?word_size:size -> t -> init:'b -> f:(addr -> word -> 'b -> 'b m) -> 'b m + (** [foldi ~word_size ~init ~f t] is like {!fold}, but also passes an + address to the [f] *) - (** [iteri ~word_size ~f t] is like {!iter}, but also passes - an address to the [f] *) - val iteri : ?word_size:size -> t -> f:(addr -> word -> unit m) -> unit m + val iteri : ?word_size:size -> t -> f:(addr -> word -> unit m) -> unit m + (** [iteri ~word_size ~f t] is like {!iter}, but also passes an address to + the [f] *) - (** [exists ~word_size ~f t] checks if at least one element of [t] - satisfies the predicate [f] *) - val exists : ?word_size:size -> t -> f:(addr -> word -> bool m) -> bool m + val exists : ?word_size:size -> t -> f:(addr -> word -> bool m) -> bool m + (** [exists ~word_size ~f t] checks if at least one element of [t] satisfies + the predicate [f] *) - (** [for_all ~word_size ~f t] checks if all elements of [t] - satisfies the predicate [f] *) - val for_all : ?word_size:size -> t -> f:(addr -> word -> bool m) -> bool m + val for_all : ?word_size:size -> t -> f:(addr -> word -> bool m) -> bool m + (** [for_all ~word_size ~f t] checks if all elements of [t] satisfies the + predicate [f] *) - (** [count ~word_size ~f t] is the number of elements in [t] - that satisfies the predicate [f]. *) - val count : ?word_size:size -> t -> f:(addr -> word -> bool m) -> int m + val count : ?word_size:size -> t -> f:(addr -> word -> bool m) -> int m + (** [count ~word_size ~f t] is the number of elements in [t] that satisfies + the predicate [f]. *) - (** [find_if ~word_size ~f t] returns the first element of - [t] that satisfies the predicate [p] or None if no elements - satisfied *) - val find_if : ?word_size:size -> t -> f:(addr -> word -> bool m) -> word option m + val find_if : + ?word_size:size -> t -> f:(addr -> word -> bool m) -> word option m + (** [find_if ~word_size ~f t] returns the first element of [t] that + satisfies the predicate [p] or None if no elements satisfied *) - (** [find_map ~word_size ~f t] returns the first evaluation - of [f] that returns [Some] or None if [f] always returns [None] *) - val find_map : ?word_size:size -> t -> f:(addr -> word -> 'a option m) -> 'a option m + val find_map : + ?word_size:size -> t -> f:(addr -> word -> 'a option m) -> 'a option m + (** [find_map ~word_size ~f t] returns the first evaluation of [f] that + returns [Some] or None if [f] always returns [None] *) end - (** Memory region *) + (** Memory region *) module Memory : sig type t = mem [@@deriving sexp_of] - - (** [create ?pos ?len endian start data] creates a memory region. - - Creates a memory view of the provided [data] using the - specified byte order [endian] and mapping the first ([pos]) - byte to the [start] address. The [pos] and [len] parameters - can be used to narrow down the view, and default to [0] and - the length of the provided string, correspondingly. - - The [data] may not be copied and the returned memory view may - reference the same bigstring object. - *) val create : - ?pos:int -> (** defaults to [0] *) - ?len:int -> (** defaults to full length *) + ?pos:int -> + (* defaults to [0] *) + ?len:int -> + (* defaults to full length *) endian -> addr -> - Bigstring.t -> t Or_error.t + Bigstring.t -> + t Or_error.t + (** [create ?pos ?len endian start data] creates a memory region. + Creates a memory view of the provided [data] using the specified byte + order [endian] and mapping the first ([pos]) byte to the [start] + address. The [pos] and [len] parameters can be used to narrow down the + view, and default to [0] and the length of the provided string, + correspondingly. - (** [rebase mem addr] returns the same memory but with the new - starting address [addr]. + The [data] may not be copied and the returned memory view may reference + the same bigstring object. *) - @since 2.2.0 - *) val rebase : t -> addr -> t + (** [rebase mem addr] returns the same memory but with the new starting + address [addr]. + + @since 2.2.0 *) - (** memory representation of a program *) val slot : (Theory.program, mem option) Knowledge.slot + (** memory representation of a program *) - (** [of_file endian start name] creates a memory region from file. - Takes data stored in a file with the given [name] and maps it - to the memory region with the specified starting address [start] - and using the [endian] for storing and reading words. - *) val of_file : endian -> addr -> string -> t Or_error.t + (** [of_file endian start name] creates a memory region from file. Takes + data stored in a file with the given [name] and maps it to the memory + region with the specified starting address [start] and using the + [endian] for storing and reading words. *) - (** [view word_size ~from ~words mem] returns a new memory - that represents the specified region of memory [mem]. [copy] - function performs deep copy. - - @param addr defaults [min_addr mem] - @param words defaults to the end of the memory region. - *) val view : ?word_size:size -> ?from:addr -> ?words:int -> t -> t Or_error.t + (** [view word_size ~from ~words mem] returns a new memory that represents + the specified region of memory [mem]. [copy] function performs deep + copy. + @param addr defaults [min_addr mem] + @param words defaults to the end of the memory region. *) - (** [view_exn mem] is the same as [ok_exn @@view_exn mem] but is - slightly more efficient. - - @raise Invalid_arg in case if the arguments are not fitting - into the memory. - - @since 2.2.0 - *) val view_exn : ?word_size:size -> ?from:addr -> ?words:int -> t -> t + (** [view_exn mem] is the same as [ok_exn @@view_exn mem] but is slightly + more efficient. + @raise Invalid_arg + in case if the arguments are not fitting into the memory. + + @since 2.2.0 *) - (** [range mem a0 a1] returns a view on [mem] starting from - address [a0] and ending at [a1], bounds inclusive *) val range : t -> addr -> addr -> t Or_error.t + (** [range mem a0 a1] returns a view on [mem] starting from address [a0] and + ending at [a1], bounds inclusive *) - (** [merge m1 m2] takes two memory regions, that either intersects or - share edges (i.e., difference between [min_addr] of one of the - blocks and [max_addr] of another is less then or equal to one, and - returns memory blocks that spans memory starting from the address - {[min (min_addr m1) (min_addr m2)]} and ending with address - {[max (max_addr m1) (max_addr m2)]}. - - Will return an error, if either the above state precondition - doesn't hold, or if this two memory blocks doesn't share the same - underlying memory (i.e., bases), or if they have different - endianness. - *) val merge : t -> t -> t Or_error.t + (** [merge m1 m2] takes two memory regions, that either intersects or share + edges (i.e., difference between [min_addr] of one of the blocks and + [max_addr] of another is less then or equal to one, and returns memory + blocks that spans memory starting from the address + {[ + min (min_addr m1) (min_addr m2) + ]} + and ending with address + {[ + max (max_addr m1) (max_addr m2) + ]} + . + + Will return an error, if either the above state precondition doesn't + hold, or if this two memory blocks doesn't share the same underlying + memory (i.e., bases), or if they have different endianness. *) - (** [first_byte m] returns first byte of [m] as a memory *) val first_byte : t -> t + (** [first_byte m] returns first byte of [m] as a memory *) - (** [last_byte m] returns last byte of [m] as a memory *) val last_byte : t -> t + (** [last_byte m] returns last byte of [m] as a memory *) - (** returns the order of bytes in a word *) val endian : t -> endian + (** returns the order of bytes in a word *) - (** [get ?disp ?index ?scale ?addr mem] reads a [scale] sized word from [mem]. - - Parameters mimic the reference syntax in the gas assembler, - e.g., [dis(base,index,scale)] denotes address at [base + index * scale + dis]. + val get : + ?disp:int -> + ?index:int -> + ?scale:size -> + ?addr:addr -> + t -> + word Or_error.t + (** [get ?disp ?index ?scale ?addr mem] reads a [scale] sized word from + [mem]. - The size of the returned word is equal to [scale], bytes are read in - the [endian mem] order. + Parameters mimic the reference syntax in the gas assembler, e.g., + [dis(base,index,scale)] denotes address at [base + index * scale + dis]. + The size of the returned word is equal to [scale], bytes are read in the + [endian mem] order. @param disp is the base offset and defaults to [0] @param index defaults to [0] - @param scale defaults to [`r8] - *) - val get : ?disp:int -> ?index:int -> ?scale:size -> ?addr:addr -> t -> word Or_error.t + @param scale defaults to [`r8] *) - (** [m^n] dereferences a byte at address [n] *) - val (^) : t -> addr -> word Or_error.t + val ( ^ ) : t -> addr -> word Or_error.t + (** [m^n] dereferences a byte at address [n] *) - (** [m^.n] dereferences a byte at address [n] *) - val (^!) : t -> addr -> word + val ( ^! ) : t -> addr -> word + (** [m^.n] dereferences a byte at address [n] *) - (** [max_addr m] is an address of the last byte of [m] *) val max_addr : t -> addr + (** [max_addr m] is an address of the last byte of [m] *) - (** [min_addr m] is an address of the first byte of [m] *) val min_addr : t -> addr + (** [min_addr m] is an address of the first byte of [m] *) - (** [length m] returns a number of bytes in m *) val length : t -> int + (** [length m] returns a number of bytes in m *) - (** [contains mem addr] returns true if [mem] contains address [addr] *) val contains : t -> addr -> bool + (** [contains mem addr] returns true if [mem] contains address [addr] *) - (** [compare_with mem addr] compares memory with [addr] *) - val compare_with : t -> addr -> [ - | `addr_is_inside - | `addr_is_below - | `addr_is_above - ] - - (** A set of low level input operations. - Note: it is more effective to use above head iterators, instead - of this low level interface, since iterators do not need to check - every memory access. *) + val compare_with : + t -> addr -> [ `addr_is_inside | `addr_is_below | `addr_is_above ] + (** [compare_with mem addr] compares memory with [addr] *) + + (** A set of low level input operations. Note: it is more effective to use + above head iterators, instead of this low level interface, since + iterators do not need to check every memory access. *) module Input : sig - (** [reader mem ~pos_ref] defines a set of functions with a - common interface. Each function accepts a memory [mem] and a - [pos_ref] - a reference to a address that should be read. This - reference will be updated for the amount of bytes that was - actually read. + type 'a reader = t -> pos_ref:addr ref -> 'a Or_error.t + (** [reader mem ~pos_ref] defines a set of functions with a common + interface. Each function accepts a memory [mem] and a [pos_ref] - a + reference to a address that should be read. This reference will be + updated for the amount of bytes that was actually read. - @return a word lifted into a monad. - *) - type 'a reader = t -> pos_ref : addr ref -> 'a Or_error.t + @return a word lifted into a monad. *) - (** [word ~word_size] a reader that reads words of [word_size] *) - val word : word_size:size -> word reader + val word : word_size:size -> word reader + (** [word ~word_size] a reader that reads words of [word_size] *) - (** [int8] a signed byte reader *) - val int8 : word reader + val int8 : word reader + (** [int8] a signed byte reader *) - (** [uint8] an unsigned byte reader *) - val uint8 : word reader + val uint8 : word reader + (** [uint8] an unsigned byte reader *) - (** [int16] a signed 16-bit word reader *) - val int16 : word reader + val int16 : word reader + (** [int16] a signed 16-bit word reader *) - (** [uint16] an unsigned 16-bit word reader *) val uint16 : word reader + (** [uint16] an unsigned 16-bit word reader *) - (** [int32] a 32-bit word reader *) - val int32 : word reader + val int32 : word reader + (** [int32] a 32-bit word reader *) - (** [int64] a 64-bit word reader *) - val int64 : word reader + val int64 : word reader + (** [int64] a 64-bit word reader *) end - (** {2 Printing and outputting} *) include Printable.S with type t := t + (** {2 Printing and outputting} *) - (** [hexdump t out] outputs hexdump (as per [hexdump -C]) of the - memory to formatter [out] *) - val hexdump: t -> string + val hexdump : t -> string + (** [hexdump t out] outputs hexdump (as per [hexdump -C]) of the memory to + formatter [out] *) - (** a set of iterators, with identity monad. *) - include Memory_iterators with type t := t - and type 'a m = 'a + (** a set of iterators, with identity monad. *) + include Memory_iterators with type t := t and type 'a m = 'a - (** iterators lifter to the Or_error monad *) - module With_error : Memory_iterators with type t := t - and type 'a m = 'a Or_error.t + (** iterators lifter to the Or_error monad *) + module With_error : + Memory_iterators with type t := t and type 'a m = 'a Or_error.t - (** lifts iterators to monad [M] *) - module Make_iterators( M : Legacy.Monad.S ) - : Memory_iterators with type t := t - and type 'a m = 'a M.t + (** lifts iterators to monad [M] *) + module Make_iterators (M : Legacy.Monad.S) : + Memory_iterators with type t := t and type 'a m = 'a M.t (** {2 Interfacing with C} - The following interfaces is supposed to be used only for the - purposes of exposing memory to c programs. *) + The following interfaces is supposed to be used only for the purposes of + exposing memory to c programs. *) - (** [to_buffers mem] creates a buffer representing the memory [mem]. - It is not specified whether the returned buffer has some sharing - with underlying implementation. In other words the returned buffer - shouldn't be modified. + val to_buffer : t -> Bigsubstring.t + (** [to_buffers mem] creates a buffer representing the memory [mem]. It is + not specified whether the returned buffer has some sharing with + underlying implementation. In other words the returned buffer shouldn't + be modified. Since it is not guaranteed that memory is contiguous, a sequence of - buffers is returned, with each buffer representing a contiguous - part of memory. - - *) - val to_buffer : t -> Bigsubstring.t + buffers is returned, with each buffer representing a contiguous part of + memory. *) - (** Tries over memory *) + (** Tries over memory *) module Trie : sig module Stable : sig module V1 : sig - module R8 : Trie.V2.S with type key = t and type token = word + module R8 : Trie.V2.S with type key = t and type token = word module R16 : Trie.V2.S with type key = t and type token = word module R32 : Trie.V2.S with type key = t and type token = word module R64 : Trie.V2.S with type key = t and type token = word end + module V2 : sig - module R8 : Trie.V2.S with type key = t and type token = word + module R8 : Trie.V2.S with type key = t and type token = word module R16 : Trie.V2.S with type key = t and type token = word module R32 : Trie.V2.S with type key = t and type token = word module R64 : Trie.V2.S with type key = t and type token = word end - end - module R8 : Trie.V2.S with type key = t and type token = word + + module R8 : Trie.V2.S with type key = t and type token = word module R16 : Trie.V2.S with type key = t and type token = word module R32 : Trie.V2.S with type key = t and type token = word module R64 : Trie.V2.S with type key = t and type token = word @@ -5208,137 +4924,138 @@ module Std : sig (** Table. - Tables are used to partition memory region into a set of - non-intersecting areas. Each area is associated with arbitrary - value of type ['a] bound to the type of the table. + Tables are used to partition memory region into a set of non-intersecting + areas. Each area is associated with arbitrary value of type ['a] bound to + the type of the table. - All operations over tables are purely applicative, i.e. there is - no observable side-effects. Although, they employ some kind of - caching underneath the hood, so that they perform better if - they're build once and used many times. + All operations over tables are purely applicative, i.e. there is no + observable side-effects. Although, they employ some kind of caching + underneath the hood, so that they perform better if they're build once and + used many times. - Tables can be also linked. For example, if you have two tables - mapping the same memory region to a different sets of values, you - can create a mapping from one set of values to another. See [link] - function for mode details. *) + Tables can be also linked. For example, if you have two tables mapping the + same memory region to a different sets of values, you can create a mapping + from one set of values to another. See [link] function for mode details. + *) module Table : sig type 'a t = 'a table [@@deriving sexp_of] type 'a hashable = 'a Hashtbl.Hashable.t - (** creates an empty table *) val empty : 'a t + (** creates an empty table *) - (** creates a table containing one bindins *) val singleton : mem -> 'a -> 'a t + (** creates a table containing one bindins *) - (** [add table mem v] returns a new table with added mapping from a - mem region [mem] to a data value [v] *) val add : 'a t -> mem -> 'a -> 'a t Or_error.t + (** [add table mem v] returns a new table with added mapping from a mem + region [mem] to a data value [v] *) - (** returns a new table with all mappings from the mem region - [mem] removed *) val remove : 'a t -> mem -> 'a t - + (** returns a new table with all mappings from the mem region [mem] removed + *) + + val change : + 'a t -> + mem -> + f: + ((mem * 'a) seq -> + [ `rebind of mem * 'a (** add new mapping instead *) + | `update of mem * 'a -> 'a (** update all bindings *) + | `remove (** remove all bindings *) + | `ignore ]) (** don't touch anything *) -> + 'a t (** [change tab mem ~f] function [f] is applied to a set of all memory regions that intersects with [mem]. If function [f] evaluates to - [`remap (new_mem,y)] then all memory regions that have had - intersections with [mem] will be removed from the new map and - memory region [new_mem] will be mapped to [y]. If [f] evaluates to - [`remove], then the regions will be removed, and nothing will be - added. If it evaluates to [`skip] then the table will be returned - unchanged. Intersections are passed sorted in an ascending order. - *) - val change : 'a t -> mem -> f:((mem * 'a) seq -> [ - | `rebind of mem * 'a (** add new mapping instead *) - | `update of ((mem * 'a) -> 'a) (** update all bindings *) - | `remove (** remove all bindings *) - | `ignore]) (** don't touch anything *) - -> 'a t - - (** [length table] returns a number of entries in the table *) + [`remap (new_mem,y)] then all memory regions that have had intersections + with [mem] will be removed from the new map and memory region [new_mem] + will be mapped to [y]. If [f] evaluates to [`remove], then the regions + will be removed, and nothing will be added. If it evaluates to [`skip] + then the table will be returned unchanged. Intersections are passed + sorted in an ascending order. *) + val length : 'a t -> int + (** [length table] returns a number of entries in the table *) - (** [find table mem] finds an element mapped to the memory region [mem] *) val find : 'a t -> mem -> 'a option + (** [find table mem] finds an element mapped to the memory region [mem] *) - (** [find_addr tab addr] finds a memory region that contains a - specified [addr] *) val find_addr : 'a t -> addr -> (mem * 'a) option + (** [find_addr tab addr] finds a memory region that contains a specified + [addr] *) - (** [intersections table mem] returns all mappings in a [table] that - have intersections with [mem] *) val intersections : 'a t -> mem -> (mem * 'a) seq + (** [intersections table mem] returns all mappings in a [table] that have + intersections with [mem] *) - (** [fold_intersections table mem] folds over all regions - intersecting with [mem] *) - val fold_intersections : 'a t -> mem -> init:'b -> f:(mem -> 'a -> 'b -> 'b) -> 'b + val fold_intersections : + 'a t -> mem -> init:'b -> f:(mem -> 'a -> 'b -> 'b) -> 'b + (** [fold_intersections table mem] folds over all regions intersecting with + [mem] *) - (** [has_intersections tab mem] is true iff some portion of [mem] is - is already mapped in [tab]. *) val has_intersections : 'a t -> mem -> bool + (** [has_intersections tab mem] is true iff some portion of [mem] is is + already mapped in [tab]. *) - (** [mem table mem] is true if table contains mem region [mem] *) val mem : _ t -> mem -> bool + (** [mem table mem] is true if table contains mem region [mem] *) - (** [next table elt] returns element next to [elt], if any *) val next : 'a t -> mem -> (mem * 'a) option + (** [next table elt] returns element next to [elt], if any *) - (** [next table elt] returns element preceding to [elt], if any *) val prev : 'a t -> mem -> (mem * 'a) option + (** [next table elt] returns element preceding to [elt], if any *) - (** [min tab] return the lowest binding *) val min : 'a t -> (mem * 'a) option + (** [min tab] return the lowest binding *) - (** [max tab] return the highest binding *) val max : 'a t -> (mem * 'a) option + (** [max tab] return the highest binding *) - (** Relation multiplicity. - For a given type ['a] creates type ['m] - *) - type ('a,'m) r + type ('a, 'm) r + (** Relation multiplicity. For a given type ['a] creates type ['m] *) - (** {2 Table relations} *) + (** {2 Table relations} *) - (** [0..*] *) val many : ('a, 'a seq) r + (** [0..*] *) val at_least_one : ('a, 'a * 'a seq) r - (** [1..1] *) val one : ('a, 'a) r + (** [1..1] *) - (** [0..1] *) val maybe_one : ('a, 'a option) r + (** [0..1] *) - (** [link relation t t1 t2] takes two tables and returns a mapping - from elements of one table to elements of other table. + (** [link relation t t1 t2] takes two tables and returns a mapping from + elements of one table to elements of other table. - Parameter [t] specifies a [hashable] typeclass of the type ['a]. If - type ['a] implements [Hashable] interface, then you can obtain it - with [hashable] function, e.g. [Int.hashable] with return the - appropriate type class. If ['a] doesn't implement [Hashable], then - it can be implemented manually. + Parameter [t] specifies a [hashable] typeclass of the type ['a]. If type + ['a] implements [Hashable] interface, then you can obtain it with + [hashable] function, e.g. [Int.hashable] with return the appropriate + type class. If ['a] doesn't implement [Hashable], then it can be + implemented manually. - Relation specifies the multiplicity of the relation between - entities from table [t1] to entities from table [t2], and is - summarized below: + Relation specifies the multiplicity of the relation between entities + from table [t1] to entities from table [t2], and is summarized below: - - [one_to_many] means that a particular region from table [t1] can - span several memory regions from table [t2]. Example: segments - to symbols relation. + - [one_to_many] means that a particular region from table [t1] can span + several memory regions from table [t2]. Example: segments to symbols + relation. - - [one_to_one] means that for each value of type ['a] there is - exactly one value of type ['b]. This relation should be used with - caution, since it is quantified over _all_ values of type - ['a]. Indeed, it should be used only for cases, when it can be - guaranteed, that it is impossible to create such value of type - ['b], that has no correspondence in table [t2]. Otherwise, - [one_to_maybe_one] relation should be used. Example: llvm - machine code to assembly string relation. + - [one_to_one] means that for each value of type ['a] there is exactly + one value of type ['b]. This relation should be used with caution, + since it is quantified over _all_ values of type ['a]. Indeed, it + should be used only for cases, when it can be guaranteed, that it is + impossible to create such value of type ['b], that has no + correspondence in table [t2]. Otherwise, [one_to_maybe_one] relation + should be used. Example: llvm machine code to assembly string + relation. - [one_to_maybe_one] means that for each value in table [t1] there - exists at most one value in table [t2]. Example: function to - symbol relation. + exists at most one value in table [t2]. Example: function to symbol + relation. {3 Examples} @@ -5347,35 +5064,40 @@ module Std : sig let syms_of_sec = link one_to:many Sec.hashable secs syms ]} *) - val link : one_to:('b,'r) r -> 'a hashable -> 'a t -> 'b t -> 'a -> 'r + val link : one_to:('b, 'r) r -> 'a hashable -> 'a t -> 'b t -> 'a -> 'r - (** [rev_map arity t tab] creates a reverse mapping from values of - typeclass [t] stored in table [tab] to memory regions. + val rev_map : + one_to:(mem, 'r) r -> 'a hashable -> 'a t -> ('a -> 'r) Or_error.t + (** [rev_map arity t tab] creates a reverse mapping from values of typeclass + [t] stored in table [tab] to memory regions. - Note. not every mapping is reversible, for example, trying to obtain - a reverse of surjective mapping as a one-to-one mapping will - result in an error. But surjective mappings can be reversed - using [~one_to:many] mapping. A particular example of surjective - mapping is [symbol] tables, in a case when functions can occupy - several non-contiguous regions of memory. + Note. not every mapping is reversible, for example, trying to obtain a + reverse of surjective mapping as a one-to-one mapping will result in an + error. But surjective mappings can be reversed using [~one_to:many] + mapping. A particular example of surjective mapping is [symbol] tables, + in a case when functions can occupy several non-contiguous regions of + memory. - For example, to create a mapping from a function symbol to - sequence of memory regions with it code: + For example, to create a mapping from a function symbol to sequence of + memory regions with it code: - {[rev_map one_to:many Sym.hashable tab]} - - *) - val rev_map : one_to:(mem,'r) r -> 'a hashable -> 'a t -> ('a -> 'r) Or_error.t + {[ + rev_map one_to:many Sym.hashable tab + ]} *) + type 'a ranged = + ?start:mem (** defaults to the lowest mapped region *) -> + ?until:mem (** defaults to the highest mapped area *) -> + 'a (** {2 Iterators} - This section provides a common set of iterators. Note: name - iterator is used in a functional meaning, i.e., an iterator is a - function that takes a data structure and another function, and - applies it to all elements in some manner. + This section provides a common set of iterators. Note: name iterator is + used in a functional meaning, i.e., an iterator is a function that takes + a data structure and another function, and applies it to all elements in + some manner. - All iterators share some common part of interface that was lifted - to a ['a ranged] type. When you see + All iterators share some common part of interface that was lifted to a + ['a ranged] type. When you see [('a t -> f:('a -> bool) -> bool) ranged] @@ -5384,858 +5106,819 @@ module Std : sig [?start -> ?until -> 'a t -> f:('a -> bool) -> bool]. In other words ['f ranged] just prepends [?start -> ?until ->] to - function with type ['f] (do not forget that ['f] can be an arrow - type). - - [start] and [until] parameters narrows iteration to some - subset of table. If they are unspecified then iteration would - be performed on all table entries in an ascending order of - addresses. If they are specified, then if [start <= until], - then iteration will be performed in the same order but on a - specified subset. In the case, when [start > until], iteration - will be performed in a decreasing order. *) - type 'a ranged - = ?start:mem (** defaults to the lowest mapped region *) - -> ?until:mem (** defaults to the highest mapped area *) - -> 'a - - (** [exists ~start ~until ~f table] checks if at least one - element of [table] satisfies the predicate [f]. *) - val exists : ('a t -> f:( 'a -> bool) -> bool) ranged - - (** [for_all ~start ~until ~f table] checks if all elements - of [table] satisfies the predicate [f]. *) - val for_all : ('a t -> f:( 'a -> bool) -> bool) ranged - - (** [existsi ~start ~until ~f table] is like {!exists}, but - also passes the memory as an argument. *) - val existsi : ('a t -> f:(mem -> 'a -> bool) -> bool) ranged - - (** [for_alli ~start ~until ~f table] is like {!for_all}, but - also passes the memory as an argument. *) - val for_alli : ('a t -> f:(mem -> 'a -> bool) -> bool) ranged - - (** [count ~start ~until ~f table] returns the number of elements - [table] that satisfy the predicate [p] *) - val count : ('a t -> f:('a -> bool) -> int) ranged - - (** [find_if ~start ~until ~f table] returns the first element of - [table] that satisfies the predicate [p] or None if no elements - satisfied *) - val find_if : ('a t -> f:('a -> bool) -> 'a option) ranged - - (** [find_map ~start ~until ~f table] returns the first evaluation - of [f] that returns [Some] or None if [f] always returns [None] *) + function with type ['f] (do not forget that ['f] can be an arrow type). + + [start] and [until] parameters narrows iteration to some subset of + table. If they are unspecified then iteration would be performed on all + table entries in an ascending order of addresses. If they are specified, + then if [start <= until], then iteration will be performed in the same + order but on a specified subset. In the case, when [start > until], + iteration will be performed in a decreasing order. *) + + val exists : ('a t -> f:('a -> bool) -> bool) ranged + (** [exists ~start ~until ~f table] checks if at least one element of + [table] satisfies the predicate [f]. *) + + val for_all : ('a t -> f:('a -> bool) -> bool) ranged + (** [for_all ~start ~until ~f table] checks if all elements of [table] + satisfies the predicate [f]. *) + + val existsi : ('a t -> f:(mem -> 'a -> bool) -> bool) ranged + (** [existsi ~start ~until ~f table] is like {!exists}, but also passes the + memory as an argument. *) + + val for_alli : ('a t -> f:(mem -> 'a -> bool) -> bool) ranged + (** [for_alli ~start ~until ~f table] is like {!for_all}, but also passes + the memory as an argument. *) + + val count : ('a t -> f:('a -> bool) -> int) ranged + (** [count ~start ~until ~f table] returns the number of elements [table] + that satisfy the predicate [p] *) + + val find_if : ('a t -> f:('a -> bool) -> 'a option) ranged + (** [find_if ~start ~until ~f table] returns the first element of [table] + that satisfies the predicate [p] or None if no elements satisfied *) + val find_map : ('a t -> f:('a -> 'b option) -> 'b option) ranged + (** [find_map ~start ~until ~f table] returns the first evaluation of [f] + that returns [Some] or None if [f] always returns [None] *) - (** [fold ~start ~until ~init ~f table] returns a fold over - [table] in form [f elt_n ( ... (f elt_2 (f (elt_1 acc))) ... )] *) - val fold : ('a t -> init:'b -> f:('a -> 'b -> 'b) -> 'b) ranged + val fold : ('a t -> init:'b -> f:('a -> 'b -> 'b) -> 'b) ranged + (** [fold ~start ~until ~init ~f table] returns a fold over [table] in form + [f elt_n ( ... (f elt_2 (f (elt_1 acc))) ... )] *) - (** [iter ~start ~until ~f table] applies function [f] in turn to - elements of [table] *) - val iter : ('a t -> f:('a -> unit) -> unit) ranged + val iter : ('a t -> f:('a -> unit) -> unit) ranged + (** [iter ~start ~until ~f table] applies function [f] in turn to elements + of [table] *) - (** [find_mapi ~start ~until ~f table] is like {!find_map}, but - also passes the memory as an argument. *) val find_mapi : ('a t -> f:(mem -> 'a -> 'b option) -> 'b option) ranged + (** [find_mapi ~start ~until ~f table] is like {!find_map}, but also passes + the memory as an argument. *) - (** [foldi ~start ~until ~f table] is like {!fold}, but - also passes the memory as an argument. *) - val foldi: ('a t -> init:'b -> f:(mem -> 'a -> 'b -> 'b) -> 'b) ranged + val foldi : ('a t -> init:'b -> f:(mem -> 'a -> 'b -> 'b) -> 'b) ranged + (** [foldi ~start ~until ~f table] is like {!fold}, but also passes the + memory as an argument. *) - (** [ieri ~start ~until ~f table] is like {!iter}, but - also passes the memory as an argument. *) val iteri : ('a t -> f:(mem -> 'a -> unit) -> unit) ranged + (** [ieri ~start ~until ~f table] is like {!iter}, but also passes the + memory as an argument. *) - (** [map ~start ~until ~f table] applies [f] to elements of - [table] and builds new table with results returned by [f] *) val map : ('a t -> f:('a -> 'b) -> 'b t) ranged + (** [map ~start ~until ~f table] applies [f] to elements of [table] and + builds new table with results returned by [f] *) - (** [mapi ~start ~until ~f table] is like {!map}, but - also passes the memory as an argument. *) val mapi : ('a t -> f:(mem -> 'a -> 'b) -> 'b t) ranged + (** [mapi ~start ~until ~f table] is like {!map}, but also passes the memory + as an argument. *) - (** [filter ~start ~until ~f table] removes all mappings from - [table] that doesn't satisfies the predicate [f] *) val filter : ('a t -> f:('a -> bool) -> 'a t) ranged + (** [filter ~start ~until ~f table] removes all mappings from [table] that + doesn't satisfies the predicate [f] *) - (** [filter_map ~start ~until ~f table] return a subtable of - [table] containing only elements for which [f] returns - [Some] *) val filter_map : ('a t -> f:('a -> 'b option) -> 'b t) ranged + (** [filter_map ~start ~until ~f table] return a subtable of [table] + containing only elements for which [f] returns [Some] *) - (** [filteri ~start ~until ~f table] is like {!filter}, but - also passes the memory as an argument. *) val filteri : ('a t -> f:(mem -> 'a -> bool) -> 'a t) ranged + (** [filteri ~start ~until ~f table] is like {!filter}, but also passes the + memory as an argument. *) - (** [filter_mapi ~start ~until ~f table] is like {!filter_map}, but - also passes the memory as an argument. *) val filter_mapi : ('a t -> f:(mem -> 'a -> 'b option) -> 'b t) ranged + (** [filter_mapi ~start ~until ~f table] is like {!filter_map}, but also + passes the memory as an argument. *) - (** [to_sequence ~start ~until table] converts the [table] to a - sequence of key-value pairs. *) val to_sequence : ('a t -> (mem * 'a) seq) ranged + (** [to_sequence ~start ~until table] converts the [table] to a sequence of + key-value pairs. *) - (** [regions table] returns in an ascending order of addresses all - memory regions mapped in a [table] *) val regions : ('a t -> mem seq) ranged + (** [regions table] returns in an ascending order of addresses all memory + regions mapped in a [table] *) - (** [elements table] returns in an ascending order of addresses all - elements mapped in a [table] *) val elements : ('a t -> 'a seq) ranged + (** [elements table] returns in an ascending order of addresses all elements + mapped in a [table] *) + val pp : + (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit (** [pp printer] - creates a printer for table from value printer *) - val pp : (Format.formatter -> 'a -> unit) -> (Format.formatter -> 'a t -> unit) end - (** A locations of a chunk of memory *) + (** A locations of a chunk of memory *) module Location : sig - type t = { - addr : addr; - len : int; - } [@@deriving bin_io, compare, fields, sexp] + type t = { addr : addr; len : int } + [@@deriving bin_io, compare, fields, sexp] end - (** memory location *) type location = Location.t [@@deriving bin_io, compare, sexp] + (** memory location *) (** A backend interface. - This interface must be implemented by a backend plugin, and - registered with [Image.register] function in order to be - accessible for loading images. + This interface must be implemented by a backend plugin, and registered + with [Image.register] function in order to be accessible for loading + images. - @deprecated Use new Ogre-powered loader interface - *) + @deprecated Use new Ogre-powered loader interface *) module Backend : sig - - (** memory access permissions *) + (** memory access permissions *) type perm = R | W | X | Or of perm * perm [@@deriving bin_io, compare, sexp] - (** A named contiguous part of file with permissions. - Also, known as segment in ELF. *) + (** A named contiguous part of file with permissions. Also, known as segment + in ELF. *) module Segment : sig type t = { - name: string; - perm: perm; (** segment's permissions *) - off: int; + name : string; + perm : perm; (** segment's permissions *) + off : int; location : location; - } [@@deriving bin_io, compare, fields, sexp] + } + [@@deriving bin_io, compare, fields, sexp] end - (** Symbol definition, that can span several non-contiguous parts of - memory *) + (** Symbol definition, that can span several non-contiguous parts of memory + *) module Symbol : sig type t = { name : string; is_function : bool; is_debug : bool; locations : location * location list; - } [@@deriving bin_io, compare, fields, sexp] + } + [@@deriving bin_io, compare, fields, sexp] end - (** Just a named region of memory. *) + (** Just a named region of memory. *) module Section : sig - type t = { - name : string; - location : location; - } [@@deriving bin_io, compare, fields, sexp] + type t = { name : string; location : location } + [@@deriving bin_io, compare, fields, sexp] end - (** A Img from a backend perspective. *) + (** A Img from a backend perspective. *) module Img : sig type t = { - arch : arch; - entry : addr; + arch : arch; + entry : addr; segments : Segment.t * Segment.t list; - symbols : Symbol.t list; - sections : Section.t list; - } [@@deriving bin_io, compare, fields, sexp] + symbols : Symbol.t list; + sections : Section.t list; + } + [@@deriving bin_io, compare, fields, sexp] end - (** the actual interface to be implemented *) type t = Bigstring.t -> Img.t option - end [@@deprecated "[since 2017-08] Use new Ogre-powered loader interface"] + (** the actual interface to be implemented *) + end + [@@deprecated "[since 2017-08] Use new Ogre-powered loader interface"] - (** Binary Image. *) + (** Binary Image. *) module Image : sig - (** {2 Type definitions} *) + (** {2 Type definitions} *) - type t = image [@@deriving sexp_of] (** image *) + type t = image [@@deriving sexp_of] + (** image *) - (** segment *) type segment [@@deriving bin_io, compare, sexp] - (** symbol *) + (** segment *) + type symbol [@@deriving bin_io, compare, sexp] + (** symbol *) type path = string - (** {2 Constructing} *) + (** {2 Constructing} *) - (** constructing an image can result in actual image and a set - (hopefully empty) of errors occurred in a process of decoding an - image, that do not prevent us from actually creating an image. So, - this information messages can be considered as warnings. *) type result = (t * Error.t list) Or_error.t + (** constructing an image can result in actual image and a set (hopefully + empty) of errors occurred in a process of decoding an image, that do not + prevent us from actually creating an image. So, this information + messages can be considered as warnings. *) - (** [create ?backend filename] creates an image of the file specified - by the [filename]. If [backend] is not specified, then - all availabe backends are used and their information is - merged. If the information provided by all backends agree (i.e., - there's no conflicting information), then image is returned. - If [backend] is an explicit file path, then it is read as an - OGRE file and used for loading. Otherwise, [backend] should be - a name of one of the backends registered either with - [register_backend] or [register_loader]. See - [available_backends] for the list of available backends. + val create : ?backend:string -> path -> result + (** [create ?backend filename] creates an image of the file specified by the + [filename]. If [backend] is not specified, then all availabe backends + are used and their information is merged. If the information provided by + all backends agree (i.e., there's no conflicting information), then + image is returned. If [backend] is an explicit file path, then it is + read as an OGRE file and used for loading. Otherwise, [backend] should + be a name of one of the backends registered either with + [register_backend] or [register_loader]. See [available_backends] for + the list of available backends. @since 2.5.0 accepts backend accepts an explicit file path, + note a file path is explicit if it exists and - [Fn.non Filename.is_implicit]. - *) - val create : ?backend:string -> path -> result + [Fn.non Filename.is_implicit]. *) - (** [of_string ?backend ~data] creates an image from the specified - [data]. See {!create} for [backend] parameter. *) val of_string : ?backend:string -> string -> result + (** [of_string ?backend ~data] creates an image from the specified [data]. + See {!create} for [backend] parameter. *) + val of_bigstring : ?backend:string -> Bigstring.t -> result (** [of_bigstring ?backend ~data] creates an image from the specified [data]. See {!create} for [backend] parameter. *) - val of_bigstring : ?backend:string -> Bigstring.t -> result - (** {2 Attributes} *) + (** {2 Attributes} *) - (** [entry_point addr] is an address from which a kernel should start *) val entry_point : t -> addr + (** [entry_point addr] is an address from which a kernel should start *) - (** [filename image] a name of file from which an image was - loaded (if any) *) val filename : t -> string option + (** [filename image] a name of file from which an image was loaded (if any) + *) - (** [arch image] code architecture *) - val arch: t -> arch + val arch : t -> arch + (** [arch image] code architecture *) - (** [addr_size image] same as [Arch.addr_size (Image.arch image)] *) val addr_size : t -> addr_size + (** [addr_size image] same as [Arch.addr_size (Image.arch image)] *) - (** [endian image] same as [Arch.endian (Image.arch image)] *) val endian : t -> endian + (** [endian image] same as [Arch.endian (Image.arch image)] *) - (** {2 Tables } *) + (** {2 Tables} *) - (** [words image size] returns a mapping from addresses to words - of the specified [size]. For example, [Image.words img `r8] - returns all bytes. *) val words : t -> size -> word table + (** [words image size] returns a mapping from addresses to words of the + specified [size]. For example, [Image.words img `r8] returns all bytes. + *) - (** [segments image] returns a mapping from addresses to segments *) val segments : t -> segment table + (** [segments image] returns a mapping from addresses to segments *) - (** [symbols image] returns a mapping from addresses to symbols *) val symbols : t -> symbol table + (** [symbols image] returns a mapping from addresses to symbols *) - (** {2 Tags} *) + (** {2 Tags} *) - (** tags a segment *) val segment : segment tag + (** tags a segment *) + val symbol : string tag (** tags a symbol *) - val symbol : string tag - (** tags a section *) val section : string tag + (** tags a section *) - (** tags a code region *) - val code_region : unit tag + val code_region : unit tag + (** tags a code region *) - (** an image specification in OGRE *) val specification : Ogre.doc tag + (** an image specification in OGRE *) - (** returns memory, annotated with tags *) val memory : t -> value memmap + (** returns memory, annotated with tags *) - (** {2 Mappings } *) + (** {2 Mappings} *) - (** [memory_of_segment img seg] returns a memory region occupied - by the segment [seg]. *) - val memory_of_segment : t -> segment -> mem + val memory_of_segment : t -> segment -> mem + (** [memory_of_segment img seg] returns a memory region occupied by the + segment [seg]. *) - (** [memory_of_symbol sym] returns a sequence of memory regions - that belong to the [sym] symbol. The sequence is represented - as a pair, where the first element is the starting memory - region, and the second elemnt is (a possible empty) sequence - of the rest memory regions (in case if a symbol occupies a - non-contigious region of memory).*) - val memory_of_symbol : t -> symbol -> mem * mem seq + val memory_of_symbol : t -> symbol -> mem * mem seq + (** [memory_of_symbol sym] returns a sequence of memory regions that belong + to the [sym] symbol. The sequence is represented as a pair, where the + first element is the starting memory region, and the second elemnt is (a + possible empty) sequence of the rest memory regions (in case if a symbol + occupies a non-contigious region of memory).*) - (** [symbols_of_segment img seg] all symbols that belong to the - [seg] segment. *) val symbols_of_segment : t -> segment -> symbol seq + (** [symbols_of_segment img seg] all symbols that belong to the [seg] + segment. *) + val segment_of_symbol : t -> symbol -> segment (** [segment_of_symbol image sym] a segment to which [sym] belongs.*) - val segment_of_symbol : t -> symbol -> segment (** Interface to the image specification. - @since 2.2.0 - *) + @since 2.2.0 *) module Spec : sig - - (** [from_arch x] constructs a minimal specification - for the given architecture [x]. *) val from_arch : arch -> Ogre.doc + (** [from_arch x] constructs a minimal specification for the given + architecture [x]. *) - (** the slot to access the specification of the uni *) val slot : (Theory.Unit.cls, Ogre.doc) KB.slot + (** the slot to access the specification of the uni *) end - (** Image Segments. - Segment is a contiguous region of memory that has - permissions. The same as segment in ELF. *) + (** Image Segments. Segment is a contiguous region of memory that has + permissions. The same as segment in ELF. *) module Segment : sig type t = segment + include Regular.S with type t := t - (** [name segment] a name associated with the segment (usually - meaningless). Guaranteed to be unique across other segments of - the same image. *) val name : t -> string + (** [name segment] a name associated with the segment (usually + meaningless). Guaranteed to be unique across other segments of the + same image. *) - (** [is_writable segment] *) - val is_writable : t -> bool + val is_writable : t -> bool + (** [is_writable segment] *) - (** [is_readable segment] *) - val is_readable : t -> bool + val is_readable : t -> bool + (** [is_readable segment] *) - (** [is_executable segment] *) val is_executable : t -> bool + (** [is_executable segment] *) end - (** Symbol. *) + (** Symbol. *) module Symbol : sig type t = symbol + include Regular.S with type t := t - (** [name sym] symbol's name *) val name : t -> string + (** [name sym] symbol's name *) - (** [is_function sym] is true if [sym] is a function. *) val is_function : t -> bool + (** [is_function sym] is true if [sym] is a function. *) - (** [is_debug sym] is true if [sym] is a debug symbol. *) val is_debug : t -> bool + (** [is_debug sym] is true if [sym] is a debug symbol. *) end - (** {2 Backend Interface} *) + (** {2 Backend Interface} *) (** An interface that a backend shall implement. - The functions provided by a loader return an OGRE document, - wrapped into option and error monads, thus the three outcomes - are possible with the following interpretation: + The functions provided by a loader return an OGRE document, wrapped into + option and error monads, thus the three outcomes are possible with the + following interpretation: - - [Ok None] - a loader doesn't know how handle files of this - type. - - [Ok (Some doc)] - a loader was able to obtain some - information from the input. + - [Ok None] - a loader doesn't know how handle files of this type. + - [Ok (Some doc)] - a loader was able to obtain some information from + the input. - - [Error err] - a file was corrupted, according to the loader. - *) + - [Error err] - a file was corrupted, according to the loader. *) module type Loader = sig - (** [from_file name] loads a file with the given [name]. *) val from_file : string -> Ogre.doc option Or_error.t + (** [from_file name] loads a file with the given [name]. *) - (** [from_data data] loads image from the specified array of bytes. *) val from_data : Bigstring.t -> Ogre.doc option Or_error.t + (** [from_data data] loads image from the specified array of bytes. *) end - (** [register_loader ~name backend] registers new loader. *) val register_loader : name:string -> (module Loader) -> unit + (** [register_loader ~name backend] registers new loader. *) (** Interfaces for working with the Knowledge Base. - @since 2.6.0 - *) + @since 2.6.0 *) module KB : sig - (** Same as [Loader], but parameterized with the Knowledge monad. *) + (** Same as [Loader], but parameterized with the Knowledge monad. *) module type Loader = sig - (** [from_file name] loads a file with the given [name]. *) val from_file : string -> Ogre.doc option knowledge + (** [from_file name] loads a file with the given [name]. *) - (** [from_data data] loads image from the specified array of bytes. *) val from_data : Bigstring.t -> Ogre.doc option knowledge + (** [from_data data] loads image from the specified array of bytes. *) end + val register_loader : name:string -> (module Loader) -> unit (** [register_loader ~name backend] registers a new loader that is parameterized with the Knowledge monad. *) - val register_loader : name:string -> (module Loader) -> unit end - (** [find_loader name] lookups the loader registered under the - given [name]. - - @since 2.2.0 - *) val find_loader : string -> (module Loader) option + (** [find_loader name] lookups the loader registered under the given [name]. + @since 2.2.0 *) - (** lists all registered backends *) val available_backends : unit -> string list + (** lists all registered backends *) - (** [register_backend ~name backend] tries to register [backend] under - the specified [name]. - @deprecated use register_loader instead - *) val register_backend : name:string -> Backend.t -> [ `Ok | `Duplicate ] [@@deprecated "[since 2017-07] use register_loader instead"] + (** [register_backend ~name backend] tries to register [backend] under the + specified [name]. + @deprecated use {!register_loader} instead *) (** {2 Internals} - Access to the low-level internals. - *) + Access to the low-level internals. *) - (** [data image] returns image data. Usually it is a memory mapped - input file, or it is whatever was passed to [of_[big]string]. *) val data : t -> Bigstring.t + (** [data image] returns image data. Usually it is a memory mapped input + file, or it is whatever was passed to [of_[big]string]. *) - + val spec : t -> Ogre.doc (** [spec image] returns the image specification. - @since 1.3 - *) - val spec : t -> Ogre.doc + @since 1.3 *) (** A scheme of image specification. - An attribute is some statement about a program that is true, - thus each attribute is a proposition in a logical database of - inferred facts. + An attribute is some statement about a program that is true, thus each + attribute is a proposition in a logical database of inferred facts. - Note, in comments we use actual field names in the synopsis - section of a function, e.g., [section addr size] means that - the [section] statement has two fields [Scheme.addr] and - [Scheme.size]. + Note, in comments we use actual field names in the synopsis section of a + function, e.g., [section addr size] means that the [section] statement + has two fields [Scheme.addr] and [Scheme.size]. See the OGRE library for more information. - @since 1.3 - *) + @since 1.3 *) module Scheme : sig open Ogre.Type type addr = int64 type size = int64 - type off = int64 + type off = int64 type value = int64 - (** a contiguous piece of memory. *) type 'a region = { - addr : addr; (** a starting address *) - size : size; (** a size of the segment *) - info : 'a (** the attached information *) + addr : addr; (** a starting address *) + size : size; (** a size of the segment *) + info : 'a; (** the attached information *) } + (** a contiguous piece of memory. *) - val off : off Ogre.field (** offset *) - val size : size Ogre.field (** size *) - val addr : addr Ogre.field (** address *) - val name : string Ogre.field (** name *) - val root : addr Ogre.field (** code root *) - val readable : bool Ogre.field (** is readable *) - val writable : bool Ogre.field (** is_writable *) - val executable : bool Ogre.field (** is_executable *) - val fixup : addr Ogre.field (** an address of a fixup *) + val off : off Ogre.field + (** offset *) - (** [arch name] a file contains code for the [name] architecture. + val size : size Ogre.field + (** size *) - E.g., arm, x86, x86_64 - *) - val arch : (string, (string -> 'a) -> 'a) Ogre.attribute + val addr : addr Ogre.field + (** address *) + val name : string Ogre.field + (** name *) - (** [subarch name] the subarchitecture, when applicable, - e.g., v7, v8, r2, etc. Should be appended to the arch - name to get the full description, e.g., armv7. + val root : addr Ogre.field + (** code root *) - @since 2.2.0 - *) - val subarch : (string, (string -> 'a) -> 'a) Ogre.attribute + val readable : bool Ogre.field + (** is_readable *) + val writable : bool Ogre.field + (** is_writable *) - (** [vendor name] the second part of the build triplet, - e.g., apple, pc, ibm, unknown. Could be just an empty - string. + val executable : bool Ogre.field + (** is_executable *) - @since 2.2.0 - *) - val vendor : (string, (string -> 'a) -> 'a) Ogre.attribute + val fixup : addr Ogre.field + (** an address of a fixup *) + + val arch : (string, (string -> 'a) -> 'a) Ogre.attribute + (** [arch name] a file contains code for the [name] architecture. + E.g., arm, x86, x86_64 *) - (** [system name] the operating system name, for which the - binary is specifically built, e.g., ananas, ios, linux. + val subarch : (string, (string -> 'a) -> 'a) Ogre.attribute + (** [subarch name] the subarchitecture, when applicable, e.g., v7, v8, r2, + etc. Should be appended to the arch name to get the full description, + e.g., armv7. - @since 2.2.0 - *) - val system : (string, (string -> 'a) -> 'a) Ogre.attribute + @since 2.2.0 *) + + val vendor : (string, (string -> 'a) -> 'a) Ogre.attribute + (** [vendor name] the second part of the build triplet, e.g., apple, pc, + ibm, unknown. Could be just an empty string. + @since 2.2.0 *) - (** [abi name] the environment/toolchain/abi under which the - binary is expected to be run, e.g., gnu, android, msvc + val system : (string, (string -> 'a) -> 'a) Ogre.attribute + (** [system name] the operating system name, for which the binary is + specifically built, e.g., ananas, ios, linux. - @since 2.2.0 - *) - val abi : (string, (string -> 'a) -> 'a) Ogre.attribute + @since 2.2.0 *) + val abi : (string, (string -> 'a) -> 'a) Ogre.attribute + (** [abi name] the environment/toolchain/abi under which the binary is + expected to be run, e.g., gnu, android, msvc - (** [bits m] is the bitness of the target architecture, e.g., - 16, 32, 64. + @since 2.2.0 *) - @since 2.2.0 - *) val bits : (size, (size -> 'a) -> 'a) Ogre.attribute + (** [bits m] is the bitness of the target architecture, e.g., 16, 32, 64. + @since 2.2.0 *) + val format : (string, (string -> 'a) -> 'a) Ogre.attribute (** [(format X)] defines the file format to be X. Currently supported formats: - ["elf"]; - ["macho"]; - - ["coff"] - *) - val format : (string, (string -> 'a) -> 'a) Ogre.attribute + - ["coff"] *) - - (** [(require library)] defines that the unit requires - [library]. - - @since 2.3.0 *) val require : (string, (string -> 'a) -> 'a) Ogre.attribute + (** [(require library)] defines that the unit requires [library]. + @since 2.3.0 *) + val is_little_endian : (bool, (bool -> 'a) -> 'a) Ogre.attribute (** [(is-little-endian FLAG)] is set for files with words encoded in the little-endian order. - @since 2.2.0 *) - val is_little_endian : (bool, (bool -> 'a) -> 'a) Ogre.attribute + @since 2.2.0 *) + val is_executable : (bool, (bool -> 'a) -> 'a) Ogre.attribute (** [(is-executable FLAG)] is set for binaries that executable. @since 2.2.0 *) - val is_executable : (bool, (bool -> 'a) -> 'a) Ogre.attribute - (** [bias offset] the value by which all addresses are biased - wrt to the real addresses in the binary. + val bias : (off, (off -> 'a) -> 'a) Ogre.attribute + (** [bias offset] the value by which all addresses are biased wrt to the + real addresses in the binary. @since 2.2.0 *) - val bias : (off, (off -> 'a) -> 'a) Ogre.attribute - (** [segment addr size readable writable executable] a memory - region (addr,size) has the specified permissions. *) - val segment : ((bool * bool * bool) region, - (addr -> size -> bool -> bool -> bool -> 'a) -> 'a) Ogre.attribute + val segment : + ( (bool * bool * bool) region, + (addr -> size -> bool -> bool -> bool -> 'a) -> 'a ) + Ogre.attribute + (** [segment addr size readable writable executable] a memory region + (addr,size) has the specified permissions. *) - (** [section addr size] a memory region is a section *) val section : (unit region, (addr -> size -> 'a) -> 'a) Ogre.attribute + (** [section addr size] a memory region is a section *) - (** [code_start addr] an address starts a code sequence *) val code_start : (addr, (addr -> 'a) -> 'a) Ogre.attribute + (** [code_start addr] an address starts a code sequence *) - (** [entry_point addr] an address is the program entry point *) val entry_point : (addr, (addr -> 'a) -> 'a) Ogre.attribute + (** [entry_point addr] an address is the program entry point *) - (** [symbol_chunk addr size root] a contiguous piece of a program - symbol, that can be a function or some data. *) val symbol_chunk : (addr region, (addr -> size -> addr -> 'a) -> 'a) Ogre.attribute + (** [symbol_chunk addr size root] a contiguous piece of a program symbol, + that can be a function or some data. *) - (** [named_region addr size name] a region of memory has a [name] *) val named_region : (string region, (addr -> size -> string -> 'a) -> 'a) Ogre.attribute + (** [named_region addr size name] a region of memory has a [name] *) - (** [named_symbol addr name] a symbol that starts at this [addr] - has this [name]. *) val named_symbol : (addr * string, (addr -> string -> 'a) -> 'a) Ogre.attribute + (** [named_symbol addr name] a symbol that starts at this [addr] has this + [name]. *) - (** [mapped addr size off] sequence of bytes in a file starting at - offset [off] and has the given [size] is mapped into memory at the - given address [addr] *) - val mapped : (off region, (addr -> size -> off -> 'a) -> 'a) Ogre.attribute - - (** [relocation fixup addr] a value referenced at the code that - has the [fixup] address is relocated to the specified address - [addr]. *) - val relocation : - (int64 * addr, (addr -> addr -> 'a) -> 'a) Ogre.attribute + val mapped : + (off region, (addr -> size -> off -> 'a) -> 'a) Ogre.attribute + (** [mapped addr size off] sequence of bytes in a file starting at offset + [off] and has the given [size] is mapped into memory at the given + address [addr] *) - (** [relative_relocation fixup] a value referenced at the code has - address [fixup] and refers to a pointer [p], which is relocated - to [p + base_address]. *) - val relative_relocation : - (addr, (addr -> 'a) -> 'a) Ogre.attribute + val relocation : (int64 * addr, (addr -> addr -> 'a) -> 'a) Ogre.attribute + (** [relocation fixup addr] a value referenced at the code that has the + [fixup] address is relocated to the specified address [addr]. *) + val relative_relocation : (addr, (addr -> 'a) -> 'a) Ogre.attribute + (** [relative_relocation fixup] a value referenced at the code has address + [fixup] and refers to a pointer [p], which is relocated to + [p + base_address]. *) - (** [external_reference addr name] a piece of code at the - specified address [addr] references an external symbol with - the given [name]. *) val external_reference : (addr * string, (addr -> string -> 'a) -> 'a) Ogre.attribute + (** [external_reference addr name] a piece of code at the specified + address [addr] references an external symbol with the given [name]. *) - (** [base_address addr] this is the base address of an image, - i.e., an address of a first byte of the image. *) val base_address : (addr, (addr -> 'a) -> 'a) Ogre.attribute + (** [base_address addr] this is the base address of an image, i.e., an + address of a first byte of the image. *) - - (** [code_region addr size off] the memory region in the file - with the given offset [off] and [size] is code that should be loaded - at the specified virtual address [addr]. *) val code_region : (addr * size * off, (addr -> size -> off -> 'a) -> 'a) Ogre.attribute + (** [code_region addr size off] the memory region in the file with the + given offset [off] and [size] is code that should be loaded at the + specified virtual address [addr]. *) - (** [symbol_value addr value] the symbol at address the - specified [value]. *) val symbol_value : (addr * value, (addr -> value -> 'a) -> 'a) Ogre.attribute + (** [symbol_value addr value] the symbol at address the specified [value]. + *) end end - (** Memory maps. - Memory map is an assosiative data structure that maps memory - regions to values. Unlike in the Table, memory - regions in the Memmap can intersect in an arbitrary ways. This - data structure is also known as an Interval Tree. + (** Memory maps. Memory map is an assosiative data structure that maps memory + regions to values. Unlike in the Table, memory regions in the Memmap can + intersect in an arbitrary ways. This data structure is also known as an + Interval Tree. - [Memmap] is an instance of the [Interval_tree] with the - [Memory] serving as an interval. - *) + [Memmap] is an instance of the [Interval_tree] with the [Memory] serving + as an interval. *) module Memmap : sig - - (** memory map, aka interval trees *) type 'a t = 'a memmap [@@deriving sexp_of] + (** memory map, aka interval trees *) - (** [empty] map *) val empty : 'a t + (** [empty] map *) - (** [singleton] a memory map containing only one memory region *) val singleton : mem -> 'a -> 'a t + (** [singleton] a memory map containing only one memory region *) - (** [min_addr map] is a minimum addr mapped in [map] *) val min_addr : 'a t -> addr option + (** [min_addr map] is a minimum addr mapped in [map] *) - (** [max_addr map] is a maximum addr mapped in [map] *) val max_addr : 'a t -> addr option + (** [max_addr map] is a maximum addr mapped in [map] *) - (** [min_binding map] is a minimum binding mapped in [map] *) val min_binding : 'a t -> (mem * 'a) option + (** [min_binding map] is a minimum binding mapped in [map] *) - (** [max_binding map] is a maximum binding mapped in [map] *) val max_binding : 'a t -> (mem * 'a) option + (** [max_binding map] is a maximum binding mapped in [map] *) - (** [add map mem tag] adds a new memory region [mem] tagged with - [tag]. If the same region was already in the [map] it will be - tagged with the [tag] again, even if it has had the same tag. *) val add : 'a t -> mem -> 'a -> 'a t + (** [add map mem tag] adds a new memory region [mem] tagged with [tag]. If + the same region was already in the [map] it will be tagged with the + [tag] again, even if it has had the same tag. *) - (** [dominators map mem] an ordered sequence of all memory regions, - containing [mem]. A memory region [(x,y)] contains region [(p,q)], - iff [p >= x && q <= y], where memory regions are depicted using - closed intervals. *) val dominators : 'a t -> mem -> (mem * 'a) seq + (** [dominators map mem] an ordered sequence of all memory regions, + containing [mem]. A memory region [(x,y)] contains region [(p,q)], iff + [p >= x && q <= y], where memory regions are depicted using closed + intervals. *) - (** [intersections map mem] an ordered sequence of all memory regions, - that intersects with [mem]. Memory region [(x,y)] intersects with - region [(p,q)] iff there exists such [z] that + val intersections : 'a t -> mem -> (mem * 'a) seq + (** [intersections map mem] an ordered sequence of all memory regions, that + intersects with [mem]. Memory region [(x,y)] intersects with region + [(p,q)] iff there exists such [z] that [z >= p || z <= q && z >= x && z <= y]. In other words if there exists such byte that belongs to both memory regions. *) - val intersections : 'a t -> mem -> (mem * 'a) seq - (** [intersects map mem] is true if [intersections map mem] is not empty *) val intersects : 'a t -> mem -> bool + (** [intersects map mem] is true if [intersections map mem] is not empty *) - (** [dominates map mem] if there is a non empty set of dominators *) val dominates : 'a t -> mem -> bool + (** [dominates map mem] if there is a non empty set of dominators *) - (** [contains map addr] true if there exists such memory region [mem], - that [Memory.contains mem addr] *) val contains : 'a t -> addr -> bool + (** [contains map addr] true if there exists such memory region [mem], that + [Memory.contains mem addr] *) - (** [lookup map addr] returns an ordered sequence of all memory - containing the [addr] *) val lookup : 'a t -> addr -> (mem * 'a) seq + (** [lookup map addr] returns an ordered sequence of all memory containing + the [addr] *) - (** [map m f] returns a new map with each tag mapped - with function [f] *) val map : 'a t -> f:('a -> 'b) -> 'b t + (** [map m f] returns a new map with each tag mapped with function [f] *) - (** [mapi m f] the same as [map], but [f] is called with two - arguments: [mem] and [tag], where [mem] is a memory region, - and [tag] is a [tag] associated with that region. *) val mapi : 'a t -> f:(mem -> 'a -> 'b) -> 'b t + (** [mapi m f] the same as [map], but [f] is called with two arguments: + [mem] and [tag], where [mem] is a memory region, and [tag] is a [tag] + associated with that region. *) - (** [filter map f] returns a map that contains only those elements - for which [f] evaluated to [true] *) val filter : 'a t -> f:('a -> bool) -> 'a t + (** [filter map f] returns a map that contains only those elements for which + [f] evaluated to [true] *) - (** [filter_map m f] creates a new map by applying a function [f] to - each tag. If [f] returns [Some x] then this region will be mapped - to [x] in a new map, otherwise it will be dropped. *) val filter_map : 'a t -> f:('a -> 'b option) -> 'b t + (** [filter_map m f] creates a new map by applying a function [f] to each + tag. If [f] returns [Some x] then this region will be mapped to [x] in a + new map, otherwise it will be dropped. *) - (** [filter_mapi] is like [filter_map] but use function also accepts - would associated memory region *) val filter_mapi : 'a t -> f:(mem -> 'a -> 'b option) -> 'b t + (** [filter_mapi] is like [filter_map] but use function also accepts would + associated memory region *) - (** [remove map mem] removes all bindings to [mem] *) val remove : 'a t -> mem -> 'a t + (** [remove map mem] removes all bindings to [mem] *) - (** [remove_intersections map mem] removes all bindings that - that intersects with [mem] *) val remove_intersections : 'a t -> mem -> 'a t + (** [remove_intersections map mem] removes all bindings that that intersects + with [mem] *) - (** [remove_dominators map mem] removes all bindings that are - dominators to [mem] *) val remove_dominators : 'a t -> mem -> 'a t + (** [remove_dominators map mem] removes all bindings that are dominators to + [mem] *) - (** [to_sequence map] converts the memmap ['a t] to a sequence of - key-value pairs *) val to_sequence : 'a t -> (mem * 'a) seq - - + (** [to_sequence map] converts the memmap ['a t] to a sequence of key-value + pairs *) include Container.S1 with type 'a t := 'a t + val pp : + (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit (** [pp pp_elem] constracts a printer for a memmap to the given element. *) - val pp : (Format.formatter -> 'a -> unit) -> (Format.formatter -> 'a t -> unit) end - (** Symbolizer defines a method for assigning symbolic names to addresses *) type symbolizer + (** Symbolizer defines a method for assigning symbolic names to addresses *) - (** Rooter defines a method for finding function starts in a program *) type rooter + (** Rooter defines a method for finding function starts in a program *) - (** Brancher defines a method for resolving branch instruction *) type brancher + (** Brancher defines a method for resolving branch instruction *) - (** Reconstructor defines a method for reconstructing symbol tables *) type reconstructor + (** Reconstructor defines a method for reconstructing symbol tables *) - (** value of type [disasm] is a result of the disassembling of a - memory region. *) type disasm + (** value of type [disasm] is a result of the disassembling of a memory + region. *) - (** values of type [insn] represents machine instructions decoded - from a given piece of memory *) type insn = Theory.Program.Semantics.t [@@deriving bin_io, compare, sexp] + (** values of type [insn] represents machine instructions decoded from a given + piece of memory *) - (** [block] is a region of memory that is believed to be a basic block - of control flow graph to the best of our knowledge. *) type block [@@deriving compare, sexp_of] + (** [block] is a region of memory that is believed to be a basic block of + control flow graph to the best of our knowledge. *) type cfg [@@deriving compare] - (** a jump kind. - A jump to another block can be conditional or unconditional. + type jump = + [ `Jump (** unconditional jump *) | `Cond (** conditional jump *) ] + [@@deriving compare, sexp] + (** a jump kind. A jump to another block can be conditional or unconditional. *) - type jump = [ - | `Jump (** unconditional jump *) - | `Cond (** conditional jump *) - ] [@@deriving compare, sexp] - (** This type defines a relation between two basic blocks. *) - type edge = [jump | `Fall] [@@deriving compare, sexp] - - (** Kinds of instructions *) + + type edge = [ jump | `Fall ] [@@deriving compare, sexp] + (** This type defines a relation between two basic blocks. *) + + (** Kinds of instructions *) module Kind : sig - type branch = [ - | `Conditional_branch - | `Unconditional_branch - | `Indirect_branch - ] [@@deriving bin_io, compare, enumerate, sexp] - - type affecting_control = [ - | branch + type branch = + [ `Conditional_branch | `Unconditional_branch | `Indirect_branch ] + [@@deriving bin_io, compare, enumerate, sexp] + + type affecting_control = + [ branch | `Return | `Call | `Barrier | `Terminator - | `May_affect_control_flow - ] [@@deriving bin_io, compare, enumerate, sexp] - - type having_side_effect = [ - | `May_load - | `May_store - ] [@@deriving bin_io, compare, enumerate, sexp] - - type t = [ - | affecting_control - | having_side_effect - ] [@@deriving bin_io, compare, enumerate, sexp] + | `May_affect_control_flow ] + [@@deriving bin_io, compare, enumerate, sexp] + + type having_side_effect = [ `May_load | `May_store ] + [@@deriving bin_io, compare, enumerate, sexp] + + type t = [ affecting_control | having_side_effect ] + [@@deriving bin_io, compare, enumerate, sexp] end - (** abstract and opaque register *) type reg [@@deriving bin_io, compare, sexp] + (** abstract and opaque register *) - (** opaque immediate value *) type imm [@@deriving bin_io, compare, sexp] + (** opaque immediate value *) - (** floating point value *) type fmm [@@deriving bin_io, compare, sexp] + (** floating point value *) - (** kind of instruction *) type kind = Kind.t [@@deriving bin_io, compare, sexp] + (** kind of instruction *) - (** Register. *) + (** Register. *) module Reg : sig type t = reg - (** unique number representing a register *) val code : t -> int + (** unique number representing a register *) - (** name of a register *) val name : t -> string + (** name of a register *) include Regular.S with type t := t end - (** Integer immediate operand *) + (** Integer immediate operand *) module Imm : sig type t = imm - (** [to_word ~width x] projects [x] to a word. Returns [None] only - if [width] is non-positive. *) - val to_word : t -> width:int -> word option + val to_word : t -> width:int -> word option + (** [to_word ~width x] projects [x] to a word. Returns [None] only if + [width] is non-positive. *) - (** [to_int64 x] maps immediates to the OCaml [int64] type *) val to_int64 : t -> int64 + (** [to_int64 x] maps immediates to the OCaml [int64] type *) + + val to_int : t -> int option + (** [to_int x] projects immediates to the OCaml [int] type. Returns [None] + if it doesn't fit. *) - (** [to_int x] projects immediates to the OCaml [int] type. Returns - [None] if it doesn't fit. *) - val to_int : t -> int option include Regular.S with type t := t end - (** Floating point immediate operand *) + (** Floating point immediate operand *) module Fmm : sig type t = fmm - (** [to_float x] maps floating point operans to the OCaml [float] type *) val to_float : t -> float + (** [to_float x] maps floating point operans to the OCaml [float] type *) + include Regular.S with type t := t end (** Operand *) module Op : sig (** operand *) - type t = - | Reg of reg - | Imm of imm - | Fmm of fmm + type t = Reg of reg | Imm of imm | Fmm of fmm [@@deriving bin_io, compare, sexp] - (** Normalized comparison. *) + + (** Normalized comparison. *) module Normalized : sig val compare : t -> t -> int val hash : t -> int @@ -6243,6 +5926,7 @@ module Std : sig end val pp_adt : Format.formatter -> t -> unit + include Regular.S with type t := t end @@ -6251,50 +5935,45 @@ module Std : sig (** Expert interface to disassembler. This interface is rather complicated, and is built around two - implementations of the disassembler [Basic] and [Recursive]. - [Basic] provides an efficient (and very lazy) linear sweep, - driven in a continuation passing style. On top of the [Basic] - the [Recursive] disassembler is built, that reconstructs the - control flow graph, and represents the latter as a table of - blocks. *) + implementations of the disassembler [Basic] and [Recursive]. [Basic] + provides an efficient (and very lazy) linear sweep, driven in a + continuation passing style. On top of the [Basic] the [Recursive] + disassembler is built, that reconstructs the control flow graph, and + represents the latter as a table of blocks. *) module Disasm_expert : sig - - (** The interface for custom backends. - This is an OCaml interface for defining custom disassembling - backends in pure OCaml. An alternative interface in C++ can - be found at disasm.hpp and disasm.h. + This is an OCaml interface for defining custom disassembling backends in + pure OCaml. An alternative interface in C++ can be found at disasm.hpp + and disasm.h. - The interface is pretty low-level and mimics one-to-one the - existing C interface between OCaml and the C/C++ disassemblers - backends, which, in turn, are optimized for performance. + The interface is pretty low-level and mimics one-to-one the existing C + interface between OCaml and the C/C++ disassemblers backends, which, in + turn, are optimized for performance. - The [Basic.custom] function wraps the backend interface and - enables seamless integration with the existing [Basic.t] - interface. To make the custom [backend] available for your - [encoding], use [Basic.register encoding] function to register - a constructor that uses [Basic.custom], e.g., + The [Basic.custom] function wraps the backend interface and enables + seamless integration with the existing [Basic.t] interface. To make the + custom [backend] available for your [encoding], use + [Basic.register encoding] function to register a constructor that uses + [Basic.custom], e.g., {[ - let () = Basic.register encoding @@ fun target -> + let () = + Basic.register encoding @@ fun target -> let dis = create_custom target in Ok (Basic.custom ?target encoding backend) ]} - where [create_custom] is a user function that creates - the custom backend and [target] contains the detailed - information about the target system. + where [create_custom] is a user function that creates the custom backend + and [target] contains the detailed information about the target system. - The [Basic.lookup] function could be used then to lazily - create the disassembler for the given [encoding], [target] - pair. The constructor will be called only once for each pair. + The [Basic.lookup] function could be used then to lazily create the + disassembler for the given [encoding], [target] pair. The constructor + will be called only once for each pair. - @since 2.2.0 - *) + @since 2.2.0 *) module Backend : sig - - (** possible semantic predicates for instructions *) + (** possible semantic predicates for instructions *) type predicate = | Is_true | Is_invalid @@ -6311,858 +5990,800 @@ module Std : sig | May_load [@@deriving compare, sexp] - - (** operand types *) + (** operand types *) type op = - | Reg (** a register *) - | Imm (** an integer immediate *) - | Fmm (** a floating-point immediate *) - | Insn (** a sub-instruction *) + | Reg (** a register *) + | Imm (** an integer immediate *) + | Fmm (** a floating-point immediate *) + | Insn (** a sub-instruction *) [@@deriving compare, sexp] - (** The backend interface. - The backend is a simple automaton that disassembles - instructions and pushes them into the queue. It runs until it - either hits an instruction that matches with one of the - previously set predicates or if it either hits an invalid - instruction or runs out of the bounds of the specified - memory region. On the high level the algorithm of the [run] - function can be described with the following pseudocode. - - 1. disassemble instruction - 2. push result into the queue - 3. update the offset - 4. if exists predicate p such that p(insn) - or off >= length(data) - then stop - else goto 1. - - If it is impossible to decode the given sequence of bytes, - then an invalid instruction is pushed into the queue and - disassembling continues on the next offset. - - Predicates enables fine control over the behavior of the - disassembler. For example, the [Is_true] predicate is always - [true] disassembler will stop after each instruction. The - backend is not required to support all predicates, only - [Is_true] and [Is_invalid] are required. - - The state of the disassembler includes the queue of - disassembled instructions, the last disassembled - instruction, the set of predicates, and the current - offset. At the beginning, the queue and the set of predicates - are empty, the offset is zero, and the last disassembled - instruction is invalid. - - To minimize allocations, opcodes and register names are - represented as offsets in the corresponding string - tables. - - *) + The backend is a simple automaton that disassembles instructions and + pushes them into the queue. It runs until it either hits an + instruction that matches with one of the previously set predicates or + if it either hits an invalid instruction or runs out of the bounds of + the specified memory region. On the high level the algorithm of the + [run] function can be described with the following pseudocode. + + 1. disassemble instruction 2. push result into the queue 3. update the + offset 4. if exists predicate p such that p(insn) or off >= + length(data) then stop else goto 1. + + If it is impossible to decode the given sequence of bytes, then an + invalid instruction is pushed into the queue and disassembling + continues on the next offset. + + Predicates enables fine control over the behavior of the disassembler. + For example, the [Is_true] predicate is always [true] disassembler + will stop after each instruction. The backend is not required to + support all predicates, only [Is_true] and [Is_invalid] are required. + + The state of the disassembler includes the queue of disassembled + instructions, the last disassembled instruction, the set of + predicates, and the current offset. At the beginning, the queue and + the set of predicates are empty, the offset is zero, and the last + disassembled instruction is invalid. + + To minimize allocations, opcodes and register names are represented as + offsets in the corresponding string tables. *) module type S = sig - - (** an abs *) type t + (** an abs *) - (** [delete d] is called when the disassembler is no longer needed. - - It is safe now to free any data related with the disassembler. - *) val delete : t -> unit + (** [delete d] is called when the disassembler is no longer needed. + It is safe now to free any data related with the disassembler. *) + val set_memory : t -> int64 -> Bigstring.t -> off:int -> len:int -> unit (** [set_memory dis addr data ~off ~len] sets the current memory region. - Sets the memory region accessible by disassembler to a - substring of [data] starting at the offset [off] and - having the length [len] with the first byte at [off] - having the address [addr]. + Sets the memory region accessible by disassembler to a substring of + [data] starting at the offset [off] and having the length [len] with + the first byte at [off] having the address [addr]. - Parameters [off] and [len] must be non-negative - numbers. The [offset dis] shall be equal to [0] after this - function is executed. + Parameters [off] and [len] must be non-negative numbers. The + [offset dis] shall be equal to [0] after this function is executed. *) - val set_memory : t -> int64 -> Bigstring.t -> off:int -> len:int -> unit + val store_predicates : t -> bool -> unit (** [store_predicates dis on_off] turns predicate storage on or off. - When it is [off] it is not required to store semantic - predicates for each instruction in the queue, only for the - last disassembled one. - It is [off] by default. - *) - val store_predicates : t -> bool -> unit + When it is [off] it is not required to store semantic predicates for + each instruction in the queue, only for the last disassembled one. + It is [off] by default. *) - (** [store_asm_string dis on_off] turns assembly string - storage on or off. + val store_asm_string : t -> bool -> unit + (** [store_asm_string dis on_off] turns assembly string storage on or + off. - When it is [off] it is not required to store assembly - strings for each instruction in the queue, only for the - last disassembled one. + When it is [off] it is not required to store assembly strings for + each instruction in the queue, only for the last disassembled one. - It is [off] by default. *) - val store_asm_string : t -> bool -> unit + It is [off] by default. *) + val insn_table : t -> Bigstring.t (** [insn_table dis] returns a string table for opcodes. - The table contains a sequence of null-terminated strings. - *) - val insn_table : t -> Bigstring.t + The table contains a sequence of null-terminated strings. *) + val reg_table : t -> Bigstring.t (** [reg_table dis] returns a string table for register names. - The table contains a sequence of null-terminated strings. - *) - val reg_table : t -> Bigstring.t + The table contains a sequence of null-terminated strings. *) - (** [predicates_clear dis] clears the set of predicates. *) val predicates_clear : t -> unit + (** [predicates_clear dis] clears the set of predicates. *) + val predicates_push : t -> predicate -> unit (** [predicates_push dis p] adds [p] to the set of predicates. - precondition: [is_supported dis p]. - *) - val predicates_push : t -> predicate -> unit + precondition: [is_supported dis p]. *) - (** [is_supported dis p] is [true] if [dis] supports [p].*) val is_supported : t -> predicate -> bool + (** [is_supported dis p] is [true] if [dis] supports [p].*) - (** [set_offset dis off] sets the current offset to [off]. *) val set_offset : t -> int -> unit + (** [set_offset dis off] sets the current offset to [off]. *) - (** [offset dis] is the current offset. *) val offset : t -> int + (** [offset dis] is the current offset. *) + val run : t -> unit (** [run dis] runs the disassembler. - The disassembler runs until it hits an instruction that - matches one of the predicates in the disassemblers current - set of predicates or it runs out of the boundaries of the - currently specified memory region. - - See the module description for the more detailed - description of the backend algorithm. - *) - val run : t -> unit + The disassembler runs until it hits an instruction that matches one + of the predicates in the disassemblers current set of predicates or + it runs out of the boundaries of the currently specified memory + region. + See the module description for the more detailed description of the + backend algorithm. *) - (** [insns_clear dis] clears the disassembler instructions queue. *) val insns_clear : t -> unit + (** [insns_clear dis] clears the disassembler instructions queue. *) - (** [insns_size dis] the length of the instruction queue. *) val insns_size : t -> int + (** [insns_size dis] the length of the instruction queue. *) (** {3 Instructions} - Each operation in this section takes a parameter labeled - with [insn] that designates the position of the - instruction in the queue, with [0] being the first - disassembled instruction and [insn_size dis - 1] being the - last disassembled. - *) - (** [insn_size dis ~insn:n] the [n]th instruction length. *) + Each operation in this section takes a parameter labeled with [insn] + that designates the position of the instruction in the queue, with + [0] being the first disassembled instruction and [insn_size dis - 1] + being the last disassembled. *) + val insn_size : t -> insn:int -> int + (** [insn_size dis ~insn:n] the [n]th instruction length. *) - (** [insn_name dis ~insn:n] the [n]th instruction name. *) val insn_name : t -> insn:int -> int + (** [insn_name dis ~insn:n] the [n]th instruction name. *) + val insn_code : t -> insn:int -> int (** [insn_name dis ~insn:n] the [n]th instruction opcode. - The opcode name is represented as an offset to the - [insn_table dis] string table in which each element is a - null-terminated string. - *) - val insn_code : t -> insn:int -> int + The opcode name is represented as an offset to the [insn_table dis] + string table in which each element is a null-terminated string. *) - (** [insn_offset dis ~insn:n] the offset of [n]th instruction. *) val insn_offset : t -> insn:int -> int + (** [insn_offset dis ~insn:n] the offset of [n]th instruction. *) - (** [insn_offset dis ~insn:n] the [n]th instruction assembly - string length. *) val insn_asm_size : t -> insn:int -> int + (** [insn_offset dis ~insn:n] the [n]th instruction assembly string + length. *) - (** [insn_asm_copy dis ~insn:n data] copies the assembly - string of the [n]th instruction. *) val insn_asm_copy : t -> insn:int -> Bytes.t -> unit + (** [insn_asm_copy dis ~insn:n data] copies the assembly string of the + [n]th instruction. *) - (** [insn_satisfies dis ~insn:n p] is [true] if - the [n]th instruction satisfies the predicate [p]. *) val insn_satisfies : t -> insn:int -> predicate -> bool + (** [insn_satisfies dis ~insn:n p] is [true] if the [n]th instruction + satisfies the predicate [p]. *) - (** [insn_ops_size dis ~insn:n] the number of operands. *) val insn_ops_size : t -> insn:int -> int - + (** [insn_ops_size dis ~insn:n] the number of operands. *) (** {4 Instruction Operands} - The following function accesses operands of [n]'th - instruction. Each operand is referenced by its position - [m] with [0] being the first operand (if such exists) and - [insn_ops_size dis - 1] being the last operand. + The following function accesses operands of [n]'th instruction. Each + operand is referenced by its position [m] with [0] being the first + operand (if such exists) and [insn_ops_size dis - 1] being the last + operand. + + The operand type is denoted with the [op] type. *) - The operand type is denoted with the [op] type. - *) - (** [insn_op_type dis ~insn:n ~oper:m] the [m]th operand type. *) val insn_op_type : t -> insn:int -> oper:int -> op + (** [insn_op_type dis ~insn:n ~oper:m] the [m]th operand type. *) + val insn_op_reg_name : t -> insn:int -> oper:int -> int (** [insn_op_reg_name dis ~insn:n ~oper:m] the register name. Returns the register name of the operand [m]. The name is - represented as an offset to the [reg_table dis], which is - a string table of null-terminated strings. + represented as an offset to the [reg_table dis], which is a string + table of null-terminated strings. - Precondition: [insn_op_type dis ~insn:n ~oper:m = Reg] - *) - val insn_op_reg_name : t -> insn:int -> oper:int -> int + Precondition: [insn_op_type dis ~insn:n ~oper:m = Reg] *) + val insn_op_reg_code : t -> insn:int -> oper:int -> int (** [insn_op_reg_name dis ~insn:n ~oper:m] the register code. - Returns the register code of the operand [m]. The code is - a unique number identifying the register (could be the - same as [insn_op_reg_name]. - - Precondition: [insn_op_type dis ~insn:n ~oper:m = Reg] - *) - val insn_op_reg_code : t -> insn:int -> oper:int -> int + Returns the register code of the operand [m]. The code is a unique + number identifying the register (could be the same as + [insn_op_reg_name]. + Precondition: [insn_op_type dis ~insn:n ~oper:m = Reg] *) + val insn_op_imm_value : t -> insn:int -> oper:int -> int64 (** [insn_op_imm_value dis ~insn:n ~oper:m] the immediate value. Returns the value of the operand [m]. - Precondition: [insn_op_type dis ~insn:n ~oper:m = Imm] - *) - val insn_op_imm_value : t -> insn:int -> oper:int -> int64 + Precondition: [insn_op_type dis ~insn:n ~oper:m = Imm] *) + val insn_op_imm_small_value : t -> insn:int -> oper:int -> int (** [insn_op_imm_small_value dis ~insn:n ~oper:m] the immediate value. - If the value [v] of the operand [m] is strictly greater - than [Int.min_val] and is strictly less than [Int.max_val] - then returns [v] otherwise returns [Int.min_val] or [Int.max_val]. - - Precondition: [insn_op_type dis ~insn:n ~oper:m = Imm] - *) - val insn_op_imm_small_value : t -> insn:int -> oper:int -> int + If the value [v] of the operand [m] is strictly greater than + [Int.min_val] and is strictly less than [Int.max_val] then returns + [v] otherwise returns [Int.min_val] or [Int.max_val]. + Precondition: [insn_op_type dis ~insn:n ~oper:m = Imm] *) + val insn_op_fmm_value : t -> insn:int -> oper:int -> float (** [insn_op_fmm_value] the floating-point immediate value. Returns the value of the operand [m]. Precondition: [insn_op_type dis ~insn:n ~oper:m = Fmm] *) - val insn_op_fmm_value : t -> insn:int -> oper:int -> float end - end (** Basic disassembler. This is a target agnostic basic low-level machine code disassembler. *) module Basic : sig + type pred = + [ `Valid (** stop on first valid insn *) + | Kind.t (* stop on first insn of the specified kind *) ] + [@@deriving sexp] (** predicate to drive the disassembler *) - type pred = [ - | `Valid (** stop on first valid insn *) - | Kind.t (** stop on first insn of the specified kind *) - ] [@@deriving sexp] - (** {2 Basic types } *) + (** {2 Basic types} *) + type (+'a, +'k) insn (** [insn] basic instruction. See {!Insn} module for a more detailed description. - @typevar 'a = {{!asm}asm} | {{!empty}empty}, denotes whether assembly + typevar 'a = {{!asm}asm} | {{!empty}empty}, denotes whether assembly representation is available for the given instruction. - @typevar 'k = {{!kind}kind} | {{!empty}empty}, denotes whether semantics - kinds are available for the given instruction.*) - type (+'a,+'k) insn + typevar 'k = {{!kind}kind} | {{!empty}empty}, denotes whether + semantics kinds are available for the given instruction.*) - (** [insns] is a list of pairs, where each pair consists of a - memory region occupied by an instruction, and the instruction - itself. *) - type (+'a,+'k) insns = (mem * ('a,'k) insn option) list + type (+'a, +'k) insns = (mem * ('a, 'k) insn option) list + (** [insns] is a list of pairs, where each pair consists of a memory + region occupied by an instruction, and the instruction itself. *) - (** witnesses the absence of the information *) type empty + (** witnesses the absence of the information *) - (** witnesses a presence of the assembly string *) type asm + (** witnesses a presence of the assembly string *) - (** witnesses a presence of the semantic kinds *) type kinds + (** witnesses a presence of the semantic kinds *) - (** abbreviate an instruction with full information. *) - type full_insn = (asm,kinds) insn [@@deriving compare, sexp_of] + type full_insn = (asm, kinds) insn [@@deriving compare, sexp_of] + (** abbreviate an instruction with full information. *) + type ('a, 'k) t (** Disassembler. - The ['a] and ['k] type variables specify disassembler modes - of operation. In a process of disassembly it can store extra - information that might be useful. Although, since storing it - takes extra time and space, it is disabled by default. + The ['a] and ['k] type variables specify disassembler modes of + operation. In a process of disassembly it can store extra information + that might be useful. Although, since storing it takes extra time and + space, it is disabled by default. - The first type variable specifies whether storing assembly - strings is enabled. It can be switched using [store_asm], - [drop_asm] functions. When it is enabled, then this type - variable will be set to [asm], and it will give an access to - functions that returns this information. Otherwise, this type - variable will be set to [empty], thus stopping you from - accessing assembler information. + The first type variable specifies whether storing assembly strings is + enabled. It can be switched using [store_asm], [drop_asm] functions. + When it is enabled, then this type variable will be set to [asm], and + it will give an access to functions that returns this information. + Otherwise, this type variable will be set to [empty], thus stopping + you from accessing assembler information. - The second type variable stands for [kinds], i.e. to store - or not to store extra information about instruction kind. + The second type variable stands for [kinds], i.e. to store or not to + store extra information about instruction kind. - Note: at some points you can have an access to this - information even if you don't enable it explicitly. *) - type ('a,'k) t + Note: at some points you can have an access to this information even + if you don't enable it explicitly. *) + type (+'a, +'k, 's, 'r) state (** Disassembler state. Words of precaution: this state is valid only inside handlers - functions of the [run] function. It shouldn't be stored - anywhere. - First two type variables are bound correspondingly to two - variables of the disassmbler [('a,'k) t] type. The last pair - of type variables are bounded to input and output types of - user functions. They are made different, so that a function - can be run in an arbitrary monad. For simple cases, the can - be made the same. *) - type (+'a,+'k,'s,'r) state - - - (** [register encoding constructor] registers a disassembler - [constructor] for the given [encoding]. - - The constructor receives the [target] value that - further specifies the details of the target system, e.g., - a cpu model, limitiations on the instruction set, etc. - - The constructor commonly uses {!create} and passes the - backend and target specific options to it. It can also use - the {!custom} function to create its own - backend. Alternatively, the {!lookup} function could be used - to delegate the decoding to another encoder. - *) - val register : Theory.language -> - (Theory.target -> (empty,empty) t Or_error.t) -> + functions of the [run] function. It shouldn't be stored anywhere. + First two type variables are bound correspondingly to two variables of + the disassmbler [('a,'k) t] type. The last pair of type variables are + bounded to input and output types of user functions. They are made + different, so that a function can be run in an arbitrary monad. For + simple cases, the can be made the same. *) + + val register : + Theory.language -> + (Theory.target -> (empty, empty) t Or_error.t) -> unit + (** [register encoding constructor] registers a disassembler [constructor] + for the given [encoding]. - (** [lookup target encoding] returns the disassembler for the - specified [target] and [encoding], creates one if necessary. + The constructor receives the [target] value that further specifies the + details of the target system, e.g., a cpu model, limitiations on the + instruction set, etc. - Returns an error if there is no constructor for the given - encoding registered (via the {!register} function) or if the - constructor itself fails to create a disassembler. *) - val lookup : Theory.target -> Theory.language -> (empty,empty) t Or_error.t + The constructor commonly uses {!create} and passes the backend and + target specific options to it. It can also use the {!custom} function + to create its own backend. Alternatively, the {!lookup} function could + be used to delegate the decoding to another encoder. *) - (** [create ?debug_level ?cpu ~backend target] creates the - disassmbler from one of the C-level backends. + val lookup : + Theory.target -> Theory.language -> (empty, empty) t Or_error.t + (** [lookup target encoding] returns the disassembler for the specified + [target] and [encoding], creates one if necessary. - The parameters are backend-specific and are commonly - set by the target support plugins via the {!register} - function, therefore the [create] function should only be used - to register a new target. Use {!lookup} to get an appropriate - disassembler for your target/encoding. + Returns an error if there is no constructor for the given encoding + registered (via the {!register} function) or if the constructor itself + fails to create a disassembler. *) - @since 2.2.0 has the [attrs] parameter - *) val create : ?debug_level:int -> ?cpu:string -> ?attrs:string -> ?backend:string -> - string -> (empty, empty) t Or_error.t + string -> + (empty, empty) t Or_error.t + (** [create ?debug_level ?cpu ~backend target] creates the disassmbler + from one of the C-level backends. + The parameters are backend-specific and are commonly set by the target + support plugins via the {!register} function, therefore the [create] + function should only be used to register a new target. Use {!lookup} + to get an appropriate disassembler for your target/encoding. - (** [custom target encoding backend disassembler] creates a - custom backend for the given [target] and [encoding]. + @since 2.2.0 has the [attrs] parameter *) - This function is commonly called by the constructor - function registered with the {!register} function. *) - val custom : Theory.target -> Theory.language -> - (module Backend.S with type t = 'a) -> 'a -> (empty,empty) t + val custom : + Theory.target -> + Theory.language -> + (module Backend.S with type t = 'a) -> + 'a -> + (empty, empty) t + (** [custom target encoding backend disassembler] creates a custom backend + for the given [target] and [encoding]. + + This function is commonly called by the constructor function + registered with the {!register} function. *) - (** [with_disasm ?debug_level ?cpu ~backend ~f target] creates a - disassembler passing all options to [create] function and - applies function [f] to it. Once [f] is evaluated the - disassembler is closed with [close] function. *) val with_disasm : - ?debug_level:int -> ?cpu:string -> ?backend:string -> string -> - f:((empty, empty) t -> 'a Or_error.t) -> 'a Or_error.t + ?debug_level:int -> + ?cpu:string -> + ?backend:string -> + string -> + f:((empty, empty) t -> 'a Or_error.t) -> + 'a Or_error.t + (** [with_disasm ?debug_level ?cpu ~backend ~f target] creates a + disassembler passing all options to [create] function and applies + function [f] to it. Once [f] is evaluated the disassembler is closed + with [close] function. *) - (** [close d] closes a disassembler [d]. *) - val close : (_,_) t -> unit + val close : (_, _) t -> unit + (** [close d] closes a disassembler [d]. *) - (** enables storing assembler information *) - val store_asm : (_,'k) t -> (asm,'k) t + val store_asm : (_, 'k) t -> (asm, 'k) t + (** enables storing assembler information *) + val store_kinds : ('a, _) t -> ('a, kinds) t (** enables storing instruction kinds information *) - val store_kinds : ('a,_) t -> ('a,kinds) t - - (** [run ?stop_on ?invalid ?stopped dis mem ~init ~return ~hit] - performs the recursive disassembly of the specified chunk of - memory [mem]. The process of disassembly can be driven using - the [stop], [step], [back] and [jump] functions, described - later. - - @param backlog defines a size of history of states, that can - be used for backtracking. Defaults to some positive natural - number. - - @param stop_on defines a set of predicates that will be - checked on each step to decide whether a disassembler should - stop here and call the user-provided [hit] function, or it should - continue. The decision is made according to the rule: [if - exists stop_on then stop], i.e., it there exists such - predicate in a set of predicates, that evaluates to true, then - stop the disassembly and pass the control to the user function - [hit]. A few notes: only valid instructions can match - predicates, and if the set is empty, then it always evaluates - to false. - - @param init initial value of user data, that can be passed - through handlers (cf., [fold]) - - @param return a function that lifts user data type ['s] to type - ['r]. It is useful when you need to perform disassembly in some - monad, like [Or_error], or [Lwt]. Otherwise, just use [Fn.id] - function and assume that ['s == 'r]. - - The disassembler will invoke user provided callbacks. To each - callback at least two parameters are passed: [state] and - [user_data]. [user_data] is arbitrary data of type ['s] with - which the folding over the memory is actually - performed. [state] incapsulates the current state of the - disassembler, and provides continuation functions, namely - [stop], [next] and [back], that drives the process of - disassembly. This functions are used to pass control back to - the disassembler. + + val run : + ?backlog:int -> + ?stop_on:pred list -> + ?invalid:(('a, 'k, 's, 'r) state -> mem -> 's -> 'r) -> + ?stopped:(('a, 'k, 's, 'r) state -> 's -> 'r) -> + ?hit:(('a, 'k, 's, 'r) state -> mem -> (asm, kinds) insn -> 's -> 'r) -> + ('a, 'k) t -> + return:('s -> 'r) -> + init:'s -> + mem -> + 'r + (** [run ?stop_on ?invalid ?stopped dis mem ~init ~return ~hit] performs + the recursive disassembly of the specified chunk of memory [mem]. The + process of disassembly can be driven using the [stop], [step], [back] + and [jump] functions, described later. + + @param backlog + defines a size of history of states, that can be used for + backtracking. Defaults to some positive natural number. + + @param stop_on + defines a set of predicates that will be checked on each step to + decide whether a disassembler should stop here and call the + user-provided [hit] function, or it should continue. The decision is + made according to the rule: [if exists stop_on then stop], i.e., it + there exists such predicate in a set of predicates, that evaluates + to true, then stop the disassembly and pass the control to the user + function [hit]. A few notes: only valid instructions can match + predicates, and if the set is empty, then it always evaluates to + false. + + @param init + initial value of user data, that can be passed through handlers + (cf., [fold]) + + @param return + a function that lifts user data type ['s] to type ['r]. It is useful + when you need to perform disassembly in some monad, like [Or_error], + or [Lwt]. Otherwise, just use [Fn.id] function and assume that + ['s == 'r]. + + The disassembler will invoke user provided callbacks. To each callback + at least two parameters are passed: [state] and [user_data]. + [user_data] is arbitrary data of type ['s] with which the folding over + the memory is actually performed. [state] incapsulates the current + state of the disassembler, and provides continuation functions, namely + [stop], [next] and [back], that drives the process of disassembly. + This functions are used to pass control back to the disassembler. [stopped state user_data] is called when there is no more data to disassemble. This handler is optional and defaults to [stop]. [invalid state user_data] is an optional handler that is called on - each invalid instruction (i.e., a portion of data that is not a - valid instruction), it defaults to [step], i.e., to skipping. + each invalid instruction (i.e., a portion of data that is not a valid + instruction), it defaults to [step], i.e., to skipping. [hit state mem insn data] is called when one of the predicates - specified by a user was hit. [insn] is actually the instruction - that satisfies the predicate. [mem] is a memory region spanned by - the instruction. [data] is a user data. [insn] can be queried for - assembly string and kinds even if the corresponding modes are - disabled. *) - val run : - ?backlog:int -> - ?stop_on:pred list -> - ?invalid:(('a,'k,'s,'r) state -> mem -> 's -> 'r) -> - ?stopped:(('a,'k,'s,'r) state -> 's -> 'r) -> - ?hit:(('a,'k,'s,'r) state -> mem -> (asm,kinds) insn -> 's -> 'r) -> - ('a,'k) t -> - return:('s -> 'r) -> - init:'s -> mem -> 'r - - (** [insn_of_mem dis mem] performs a disassembly of one instruction - from the a given memory region [mem]. Returns a tuple + specified by a user was hit. [insn] is actually the instruction that + satisfies the predicate. [mem] is a memory region spanned by the + instruction. [data] is a user data. [insn] can be queried for assembly + string and kinds even if the corresponding modes are disabled. *) + + val insn_of_mem : + (_, _) t -> + mem -> + (mem * (asm, kinds) insn option * [ `left of mem | `finished ]) + Or_error.t + (** [insn_of_mem dis mem] performs a disassembly of one instruction from + the a given memory region [mem]. Returns a tuple [imem,insn,`left over] where [imem] stands for a piece of memory consumed in a process of disassembly, [insn] can be [Some ins] if disassembly was successful, and [None] otherwise. [`left over] complements [imem] to original [mem]. *) - val insn_of_mem : (_,_) t -> mem -> - (mem * (asm,kinds) insn option * [`left of mem | `finished]) Or_error.t - (** current position of the disassembler *) - val addr : (_,_,_,_) state -> addr + val addr : (_, _, _, _) state -> addr + (** current position of the disassembler *) + val preds : (_, _, _, _) state -> pred list (** current set of predicates *) - val preds : (_,_,_,_) state -> pred list - (** updates the set of predicates, that rules the stop condition. *) - val with_preds : ('a,'k,'s,'r) state -> pred list -> ('a,'k,'s,'r) state + val with_preds : + ('a, 'k, 's, 'r) state -> pred list -> ('a, 'k, 's, 'r) state + (** updates the set of predicates, that rules the stop condition. *) - (** a queue of instructions disassembled in this step *) - val insns : ('a,'k,_,_) state -> ('a,'k) insns + val insns : ('a, 'k, _, _) state -> ('a, 'k) insns + (** a queue of instructions disassembled in this step *) - (** [last s n] returns last [n] instructions disassembled in this - step. If there are less then [n] instructions, then returns a - smaller list *) - val last : ('a,'k,'s,'r) state -> int -> ('a,'k) insns + val last : ('a, 'k, 's, 'r) state -> int -> ('a, 'k) insns + (** [last s n] returns last [n] instructions disassembled in this step. If + there are less then [n] instructions, then returns a smaller list *) + val memory : (_, _, _, _) state -> mem (** the memory region we're currently working on *) - val memory : (_,_,_,_) state -> mem - (** stop the disassembly and return the provided value. *) - val stop : (_,_,'s,'r) state -> 's -> 'r + val stop : (_, _, 's, 'r) state -> 's -> 'r + (** stop the disassembly and return the provided value. *) - (** continue disassembling from the current point. You can change a - a set of predicates, before stepping next. If you want to continue - from a different address, use [jump] *) - val step : (_,_,'s,'r) state -> 's -> 'r + val step : (_, _, 's, 'r) state -> 's -> 'r + (** continue disassembling from the current point. You can change a a set + of predicates, before stepping next. If you want to continue from a + different address, use [jump] *) + val jump : (_, _, 's, 'r) state -> mem -> 's -> 'r (** jump to the specified memory and continue disassembly in it. - For example, if you want to jump to a specified address, and - you're working in a [Or_error] monad, then you can: + For example, if you want to jump to a specified address, and you're + working in a [Or_error] monad, then you can: - [view ~from:addr (mem state) >>= fun mem -> jump mem data] - *) - val jump : (_,_,'s,'r) state -> mem -> 's -> 'r + [view ~from:addr (mem state) >>= fun mem -> jump mem data] *) - (** restarts last step. *) - val back : (_,_,'s,'r) state -> 's -> 'r + val back : (_, _, 's, 'r) state -> 's -> 'r + (** restarts last step. *) (** Basic instruction aka machine-specific instruction. - The machine-specific instruction is composed of a name, - operands, and kinds (or flags) that denote additional - information about the instruction. - - The meaning of the name and operands is specific to a - particular machine and encoding (see {!Insn.encoding}). The - meaning of the instruction kinds is more or less universal. - + The machine-specific instruction is composed of a name, operands, and + kinds (or flags) that denote additional information about the + instruction. - *) + The meaning of the name and operands is specific to a particular + machine and encoding (see {!Insn.encoding}). The meaning of the + instruction kinds is more or less universal. *) module Insn : sig + type ('a, 'k) t = ('a, 'k) insn - type ('a,'k) t = ('a,'k) insn - - (** a decoded representa *) val slot : (Theory.program, full_insn option) Knowledge.slot + (** a decoded representa *) - (** [sexp_of_t insn] returns a sexp representation of [insn] *) - val sexp_of_t : ('a,'k) t -> Sexp.t + val sexp_of_t : ('a, 'k) t -> Sexp.t + (** [sexp_of_t insn] returns a sexp representation of [insn] *) - (** [compare i1 i2] compares instruction [i1] and [i2] *) - val compare : ('a,'k) t -> ('a,'k) t -> int + val compare : ('a, 'k) t -> ('a, 'k) t -> int + (** [compare i1 i2] compares instruction [i1] and [i2] *) - (** [code insn] returns an integer code, that is bijective - with instruction opcode. It might not be the actual opcode, it - may also change between different backends, and different - versions of the same backend. *) - val code : ('a,'k) t -> int + val code : ('a, 'k) t -> int + (** [code insn] returns an integer code, that is bijective with + instruction opcode. It might not be the actual opcode, it may also + change between different backends, and different versions of the + same backend. *) - (** [name insn] returns a textual representation of the - instruction name. It might be the mnemonics, or a name, - specific to a backend. The name is guaranteed to biject with - the opcode (and thus to [code]). *) - val name : ('a,'k) t -> string + val name : ('a, 'k) t -> string + (** [name insn] returns a textual representation of the instruction + name. It might be the mnemonics, or a name, specific to a backend. + The name is guaranteed to biject with the opcode (and thus to + [code]). *) + val encoding : ('a, 'k) t -> string (** returns the name of the backend that encoded this instruction. - @since 2.1.0 - *) - val encoding : ('a,'k) t -> string - + @since 2.1.0 *) - (** [kinds insn] returns a high-level semantic information - about the instruction. See {!Kind} for the description of - semantic codes. *) - val kinds : ('a,kinds) t -> Kind.t list + val kinds : ('a, kinds) t -> Kind.t list + (** [kinds insn] returns a high-level semantic information about the + instruction. See {!Kind} for the description of semantic codes. *) - (** [is insn kind] checks whether instruction [insn] belongs - to the semantic [kind] *) - val is : ('a,kinds) t -> Kind.t -> bool + val is : ('a, kinds) t -> Kind.t -> bool + (** [is insn kind] checks whether instruction [insn] belongs to the + semantic [kind] *) - (** [asm insn] returns assembly representation of the instruction *) - val asm : (asm,'k) t -> string + val asm : (asm, 'k) t -> string + (** [asm insn] returns assembly representation of the instruction *) - (** [ops insn] gives an access to [insn]'s operands. *) - val ops : ('a,'k) t -> op array + val ops : ('a, 'k) t -> op array + (** [ops insn] gives an access to [insn]'s operands. *) + val subs : ('a, 'k) t -> ('a, 'k) t array (** [subs insn] an array of subinstructions. - An instruction can contain subinstructions, which could be - accessed with this function. Returns an empty array if - there are no subinstructions. + An instruction can contain subinstructions, which could be accessed + with this function. Returns an empty array if there are no + subinstructions. @since 2.5.0 *) - val subs : ('a,'k) t -> ('a,'k) t array end - (** Trie maps over instructions *) + (** Trie maps over instructions *) module Trie : sig type key + val key_of_first_insns : (_, _, _, _) state -> len:int -> key option (** [key_of_first_insns state ~len:n] creates a key from first [n] - instructions stored in the state if state contains such - amount of instructions *) - val key_of_first_insns : (_,_,_,_) state -> len:int -> key option + instructions stored in the state if state contains such amount of + instructions *) module Normalized : Trie.S with type key = key include Trie.S with type key := key end - (** enumerates names of available disassembler backends. *) val available_backends : unit -> string list + (** enumerates names of available disassembler backends. *) end - (** A simple linear sweep disassembler. *) + (** A simple linear sweep disassembler. *) module Linear : sig - - (** output type of a disassembler. *) type t = (mem * insn option) list + (** output type of a disassembler. *) - (** [Linear.sweep arch mem] will perform a linear sweep - disassembly on the specified memory [mem] *) val sweep : ?backend:string -> arch -> mem -> t Or_error.t + (** [Linear.sweep arch mem] will perform a linear sweep disassembly on the + specified memory [mem] *) module With_exn : sig - (** [Linear.With_exn.sweep] same as - [Linear_sweep.memory], but raises an exception, instead of - returning [Or_error] monad *) val sweep : ?backend:string -> arch -> mem -> t + (** [Linear.With_exn.sweep] same as [Linear_sweep.memory], but raises an + exception, instead of returning [Or_error] monad *) end end - (** Recursive Descent Disassembler. This disassembler is built on - top of [Basic] disassembler. It uses the work list algorithm - to implement recursive descent disassembly and reconstructs - the whole program CFG. + (** Recursive Descent Disassembler. This disassembler is built on top of + [Basic] disassembler. It uses the work list algorithm to implement + recursive descent disassembly and reconstructs the whole program CFG. - This is an expert-level module, and it is suggested to use - high-level [Disasm] interface, that is built ontop of this - module. *) + This is an expert-level module, and it is suggested to use high-level + [Disasm] interface, that is built ontop of this module. *) module Recursive : sig - type t - (** [error] domain of errors. *) - type error = [ - | `Failed_to_disasm of mem - | `Failed_to_lift of mem * Basic.full_insn * Error.t - ] [@@deriving sexp_of] - - (** [run ?backend ?brancher ?rooter arch mem] disassemble and - reconstruct a CFG of the code in [mem], assuming - architecture [arch]. + type error = + [ `Failed_to_disasm of mem + | `Failed_to_lift of mem * Basic.full_insn * Error.t ] + [@@deriving sexp_of] + (** [error] domain of errors. *) - @param backend a backend name (default is implementation defined). - @param brancher what {{!Brancher}brancher} to use - (defaults to {!Brancher.of_bil}. - @param rooter what {{!Rooter}rooter} to use (defaults to - {!Rooter.empty}). *) val run : ?backend:string -> ?brancher:brancher -> - ?rooter:rooter -> arch -> mem -> t Or_error.t + ?rooter:rooter -> + arch -> + mem -> + t Or_error.t + (** [run ?backend ?brancher ?rooter arch mem] disassemble and reconstruct + a CFG of the code in [mem], assuming architecture [arch]. + + @param backend a backend name (default is implementation defined). + @param brancher + what {{!Brancher}brancher} to use (defaults to {!Brancher.of_bil}. + @param rooter + what {{!Rooter}rooter} to use (defaults to {!Rooter.empty}). *) - (** [cfg t] returns a control flow graph, representing the code - in the input region of memory. Note, this is not a subroutine CFG, - is is a whole segment graph. *) val cfg : t -> cfg + (** [cfg t] returns a control flow graph, representing the code in the + input region of memory. Note, this is not a subroutine CFG, is is a + whole segment graph. *) - (** [errors disasm] returns a list of non-critical errors, that - happened during the disassembly (e.g., unknown opcodes and - unlifted instructions. *) val errors : t -> error list + (** [errors disasm] returns a list of non-critical errors, that happened + during the disassembly (e.g., unknown opcodes and unlifted + instructions. *) end end (** Assembly instruction. - On a high level, the instruction is a pair of the opcode and - operands. A BIL code, that describes semantics of the - instruction may be attached to it. Also, semantic tags (or flags) - may add further information about the instruction. + On a high level, the instruction is a pair of the opcode and operands. A + BIL code, that describes semantics of the instruction may be attached to + it. Also, semantic tags (or flags) may add further information about the + instruction. - The instruction are usually created by a low level machinery, - and analyzed on the later stages. So, usually, there is no need - to create one manually. + The instruction are usually created by a low level machinery, and analyzed + on the later stages. So, usually, there is no need to create one manually. For example, each block is a sequence of instructions (see - {!Block.insns}), also with each non-synthetic term there is an - an {!Disasm.insn} field, that stores an instruction from which - the term was born. - *) + {!Block.insns}), also with each non-synthetic term there is an an + {!Disasm.insn} field, that stores an instruction from which the term was + born. *) module Insn : sig - type t = Theory.Program.Semantics.t [@@deriving bin_io, compare, sexp] - - (** Instruction properties. *) + (** Instruction properties. *) module Slot : sig type 'a t = (Theory.Program.Semantics.cls, 'a) KB.slot + val name : string t (** An opcode name, - Also accessible with [Insn.name]. - *) - val name : string t + Also accessible with [Insn.name]. *) - (** An assembly representation. *) - val asm : string t + val asm : string t + (** An assembly representation. *) - (** a list of operands *) - val ops : op array option t + val ops : op array option t + (** a list of operands *) - (** the length of the delay slot. *) val delay : int option t + (** the length of the delay slot. *) - (** the set of destinations (not including the fall-through edge). *) val dests : Set.M(Theory.Label).t option t + (** the set of destinations (not including the fall-through edge). *) end - (** [of_basic ?bil insn] derives semantics from the machine code instruction.*) val of_basic : ?bil:bil -> Disasm_expert.Basic.full_insn -> t + (** [of_basic ?bil insn] derives semantics from the machine code + instruction.*) + val with_basic : t -> Disasm_expert.Basic.full_insn -> t (** [with_basic mc] stores properties of the machine code instruction. - @since 2.2.0 - *) - val with_basic : t -> Disasm_expert.Basic.full_insn -> t + @since 2.2.0 *) - (** [empty] is an instruction with no known semantics *) val empty : t + (** [empty] is an instruction with no known semantics *) - (** returns backend specific name of instruction *) val name : t -> string + (** returns backend specific name of instruction *) - (** target-specific assembler string representing the instruction *) - val asm : t -> string + val asm : t -> string + (** target-specific assembler string representing the instruction *) - (** returns BIL program specifying instruction semantics *) - val bil : t -> bil + val bil : t -> bil + (** returns BIL program specifying instruction semantics *) - (** instruction operands *) - val ops : t -> op array + val ops : t -> op array + (** instruction operands *) (** {3 Instruction properties} - A property or a semantic tag is some kind of attribute - associated with an instruction. Usually a property is a - boolean, it either holds or not. In our case we employ modular - logic, and a property can have an intermediate state between - true and false. That means, that we have two kinds of - relations, strong "must" and weaker "may". The [must] - property is known to be a property associated with the - instruction. It is a strong knowledge. For example, if an - instruction has [jump] property, then it is guaranteed to be a - jump instruction. On the other hand, the [may] property - represent some uncertain knowledge. For example, the [load] - property is [may] as it designates that an instruction may - access the main memory, or may not access, as it depends on some - information, that cannot be deduced statically. *) + A property or a semantic tag is some kind of attribute associated with + an instruction. Usually a property is a boolean, it either holds or not. + In our case we employ modular logic, and a property can have an + intermediate state between true and false. That means, that we have two + kinds of relations, strong "must" and weaker "may". The [must] property + is known to be a property associated with the instruction. It is a + strong knowledge. For example, if an instruction has [jump] property, + then it is guaranteed to be a jump instruction. On the other hand, the + [may] property represent some uncertain knowledge. For example, the + [load] property is [may] as it designates that an instruction may access + the main memory, or may not access, as it depends on some information, + that cannot be deduced statically. *) type must = Must type may = May type 'a property - - (** [new_property must_or_may name] creates a new instruction - property with the specified name. *) val new_property : 'a -> string -> 'a property + (** [new_property must_or_may name] creates a new instruction property with + the specified name. *) + val jump : must property (** the instruction performs a non-regular control flow *) - val jump : must property - (** under some dynamic condition the instruction may perform a - non-regular control flow *) - val conditional : must property + val conditional : must property + (** under some dynamic condition the instruction may perform a non-regular + control flow *) - (** the instruction is jump with a target that is not a constant *) - val indirect : must property + val indirect : must property + (** the instruction is jump with a target that is not a constant *) - (** the instruction is a call to subroutine. *) - val call : must property + val call : must property + (** the instruction is a call to subroutine. *) - (** instruction is a return from a call *) - val return : must property + val return : must property + (** instruction is a return from a call *) - (** the instruction has no fall-through *) - val barrier : must property + val barrier : must property + (** the instruction has no fall-through *) - (** the instruction may perform a non-regular control flow *) - val affect_control_flow : may property + val affect_control_flow : may property + (** the instruction may perform a non-regular control flow *) - (** the instruction may load from memory *) - val load : may property + val load : may property + (** the instruction may load from memory *) - (** the instruction may store to memory *) - val store : may property + val store : may property + (** the instruction may store to memory *) - (** [is property insn] is [true] if [insn] has [property] *) - val is : must property -> t -> bool + val is : must property -> t -> bool + (** [is property insn] is [true] if [insn] has [property] *) - (** [may property insn] is [true] if [insn] has [property] *) - val may : may property -> t -> bool + val may : may property -> t -> bool + (** [may property insn] is [true] if [insn] has [property] *) - (** [must property insn] postulate that [insn] must have the [property] *) - val must : must property -> t -> t + val must : must property -> t -> t + (** [must property insn] postulate that [insn] must have the [property] *) - (** [must property insn] postulate that [insn] must not have the [property] *) val mustn't : must property -> t -> t + (** [must property insn] postulate that [insn] must not have the [property] + *) - (** [must property insn] postulate that [insn] may have the [property] *) - val should : may property -> t -> t + val should : may property -> t -> t + (** [must property insn] postulate that [insn] may have the [property] *) - (** [must property insn] postulate that [insn] shouldn't have the [property] *) - val shouldn't : may property -> t -> t + val shouldn't : may property -> t -> t + (** [must property insn] postulate that [insn] shouldn't have the [property] + *) - (** [pp_adt] prints instruction in ADT format, suitable for reading - by evaluating in many languages, e.g. Python, Js, etc *) val pp_adt : Format.formatter -> t -> unit - + (** [pp_adt] prints instruction in ADT format, suitable for reading by + evaluating in many languages, e.g. Python, Js, etc *) (** Subinstruction Sequence Number. - A subinstruction sequence number plays the role of an address - for sub-instruction (which otherwise share the same physical - address). + A subinstruction sequence number plays the role of an address for + sub-instruction (which otherwise share the same physical address). - Each subinstruction is having a unique address across the - whole program (not only unique across to other subinstructions - of the same instruction) and much like [Theory.Label.for_addr] - it is possible to get a label that corresponds to an - instruction with the given sequence number using - [Seqnum.label]. + Each subinstruction is having a unique address across the whole program + (not only unique across to other subinstructions of the same + instruction) and much like [Theory.Label.for_addr] it is possible to get + a label that corresponds to an instruction with the given sequence + number using [Seqnum.label]. - The sequence number is represented with an integer to enable - address arithemetics. A subinstruction that follows a - subinstruction with the sequence number [N] has the sequence - number [N+1]. + The sequence number is represented with an integer to enable address + arithemetics. A subinstruction that follows a subinstruction with the + sequence number [N] has the sequence number [N+1]. - @since 2.4.0 - *) + @since 2.4.0 *) module Seqnum : sig type t = int - - (** [label seqnum] returns the program label that corresponds - to [seqnum]. *) val label : ?package:string -> t -> Theory.Label.t KB.t + (** [label seqnum] returns the program label that corresponds to [seqnum]. + *) - (** [slot] for accessing the sequence number of a subinstruction. *) val slot : (Theory.program, t option) KB.slot + (** [slot] for accessing the sequence number of a subinstruction. *) - + val fresh : tid knowledge (** [fresh] evaluates to a freshly generated sequence number. - @since 2.5.0 *) - val fresh : tid knowledge + @since 2.5.0 *) end - (** {3 Prefix Tree} This module provides a trie data structure where a sequence of - instructions is used as a key (and an individual instruction - as a token). Two implementations are provided, a regular, where - insns are compared as-is, and normalized, where instructions are - compared using normalized comparison. - - In normalized comparison concerete immediate values are ignored, - and if instructions have different number of operands, then only - then excess operands are excluded from the comparison. - *) + instructions is used as a key (and an individual instruction as a + token). Two implementations are provided, a regular, where insns are + compared as-is, and normalized, where instructions are compared using + normalized comparison. + + In normalized comparison concerete immediate values are ignored, and if + instructions have different number of operands, then only then excess + operands are excluded from the comparison. *) module Trie : sig - (** Trie requires 0(1) get operation *) type key + (** Trie requires 0(1) get operation *) - (** [key_of_insns insns] takes a list of instructions and transforms - it to [key] *) val key_of_insns : t list -> key + (** [key_of_insns insns] takes a list of instructions and transforms it to + [key] *) module Normalized : Trie.S with type key = key include Trie.S with type key := key @@ -7173,511 +6794,444 @@ module Std : sig (** Basic block. - Basic block is piece of code, that has single entry and single - exit. It can be seen as a container for instructions. Also, - basic blocks are nodes of control flow graphs. + Basic block is piece of code, that has single entry and single exit. It + can be seen as a container for instructions. Also, basic blocks are nodes + of control flow graphs. The following invariants must be preserved: - - there is no known jump in the program, that points to an - instruction that is not a leader of a basic block; + - there is no known jump in the program, that points to an instruction + that is not a leader of a basic block; - any jump instruction is a terminator of some basic block; - - each basic block consists of at least one instruction. - *) + - each basic block consists of at least one instruction. *) module Block : sig type t = block [@@deriving compare, sexp_of] - (** [create mem insn] creates a block - Preconditions: - - [insns <> []] *) val create : mem -> (mem * insn) list -> t + (** [create mem insn] creates a block Preconditions: + - [insns <> []] *) - (** [addr block = Memory.min_addr (memory block)] address of the first instruction *) val addr : t -> addr + (** [addr block = Memory.min_addr (memory block)] address of the first + instruction *) - (** [memory blk] a memory region, occupied by a block*) val memory : t -> mem + (** [memory blk] a memory region, occupied by a block*) - (** [leader blk] the first instruction *) val leader : t -> insn + (** [leader blk] the first instruction *) - (** [terminator blk] last instruction of the block *) val terminator : t -> insn + (** [terminator blk] last instruction of the block *) - (** [insns blk] returns a list of block instructions.*) val insns : t -> (mem * insn) list + (** [insns blk] returns a list of block instructions.*) - (** Since [block] contains a region of memory, that is not regular, - the block itself is also non-regular. But it is, at least, - printable. *) - include Opaque.S with type t := t + include Opaque.S with type t := t + (** Since [block] contains a region of memory, that is not regular, the + block itself is also non-regular. But it is, at least, printable. *) + include Printable.S with type t := t (** all the printing stuff, including [to_string] function *) - include Printable.S with type t := t end (** BAP Common Graphs. - This module contains several graph structures, that are used - across BAP. + This module contains several graph structures, that are used across BAP. - The idiomatic use case is to bind the chosen graph to a shorter - name and use it as a first class module with different functions - in {{!Graphlib.Std}graphlib} library, e.g., + The idiomatic use case is to bind the chosen graph to a shorter name and + use it as a first class module with different functions in + {{!Graphlib.Std}graphlib} library, e.g., {[ module G = Graphs.Cfg let insns cfg = - Graphlib.reverse_postorder_traverse (module G) cfg |> - Seq.map ~f:Block.insns |> - Seq.concat_map ~f:Seq.of_list - ]} - *) + Graphlib.reverse_postorder_traverse (module G) cfg + |> Seq.map ~f:Block.insns + |> Seq.concat_map ~f:Seq.of_list + ]} *) module Graphs : sig - - (** Control Flow Graph with a machine basic block as a node. *) - module Cfg : Graph with type t = cfg - and type node = block - and type Edge.label = edge - - (** A call graph representation. - In this representations, nodes are identifiers of subroutine - terms, and edges, representing calls, are marked with a list of - callsites, where callsite is denoted by a jump term. *) - module Callgraph : Graph with type node = tid - and type Node.label = tid - and type Edge.label = jmp term list + (** Control Flow Graph with a machine basic block as a node. *) + module Cfg : + Graph with type t = cfg and type node = block and type Edge.label = edge + + (** A call graph representation. In this representations, nodes are + identifiers of subroutine terms, and edges, representing calls, are + marked with a list of callsites, where callsite is denoted by a jump + term. *) + module Callgraph : + Graph + with type node = tid + and type Node.label = tid + and type Edge.label = jmp term list (** Graph view over IR. - This module implements a graph view on an intermediate - representation of a subroutine. To create an instance of a - graph, using existing subroutine use {!Sub.to_cfg}. At any - moment current sub term can be obtained using {!Sub.of_cfg} - function. This is a just a projection operation, so it doesn't - take any computing time. - - All [Graph] modification operations, like [insert], [remove] - and [update] in [Node] and [Edge] modules are mapped to - corresponding [Term] operations. Also, for performance - reasons, graph is augmented with auxiliary data structures, - that performs most of the operations in O(log(N)) time. - - Although this implements all operations of {!Graph} interface - it is recommended to use {!Term} or [Builder] interfaces to - build and modify underlying terms. The next few sections will - clarify the behavior of a graph when it is modified using - {!Graph} interface. If you do not want to read the following - sections, then better do not use this module to build your - terms. + This module implements a graph view on an intermediate representation of + a subroutine. To create an instance of a graph, using existing + subroutine use {!Sub.to_cfg}. At any moment current sub term can be + obtained using {!Sub.of_cfg} function. This is a just a projection + operation, so it doesn't take any computing time. + + All [Graph] modification operations, like [insert], [remove] and + [update] in [Node] and [Edge] modules are mapped to corresponding [Term] + operations. Also, for performance reasons, graph is augmented with + auxiliary data structures, that performs most of the operations in + O(log(N)) time. + + Although this implements all operations of {!Graph} interface it is + recommended to use {!Term} or [Builder] interfaces to build and modify + underlying terms. The next few sections will clarify the behavior of a + graph when it is modified using {!Graph} interface. If you do not want + to read the following sections, then better do not use this module to + build your terms. {2 Inserting nodes} - When node is inserted into a graph [g] all jumps of a node, - that lead to blocks that are already in a graph will be - represented as edges. Also, all jumps from other nodes to the - inserted node, will be added as edges (assuming that this - other nodes are also in the graph g). Thus inserting node can - create an arbitrary number of edges, from zero to N. If jump - target is not yet in the graph, then jump is not removed from a - sequence of jumps of the inserted node, but just ignored. + When node is inserted into a graph [g] all jumps of a node, that lead to + blocks that are already in a graph will be represented as edges. Also, + all jumps from other nodes to the inserted node, will be added as edges + (assuming that this other nodes are also in the graph g). Thus inserting + node can create an arbitrary number of edges, from zero to N. If jump + target is not yet in the graph, then jump is not removed from a sequence + of jumps of the inserted node, but just ignored. {2 Updating nodes} - When node is updated with the same node (but possibly with - different set of terms, see {{!sema}description of sameness}) - then all changes that affects control flow will be - applied. For example, if jump is absent in a new version of a - block, and this jump corresponds to an edge in the graph, then - this edge will be removed. + When node is updated with the same node (but possibly with different set + of terms, see {{!sema}description of sameness}) then all changes that + affects control flow will be applied. For example, if jump is absent in + a new version of a block, and this jump corresponds to an edge in the + graph, then this edge will be removed. {2 Removing nodes} - The node will be removed from the underlying [sub term], and - all edges incident to the removed node will be also removed. - This will not affect jmp terms of blk terms. + The node will be removed from the underlying [sub term], and all edges + incident to the removed node will be also removed. This will not affect + jmp terms of blk terms. {2 Inserting edges} - Edges in IR graph represents a transfer of a control flow - between basic blocks. The basic block in IR is more reach, - rather then a node in a graph. For example, in blk term the - order of jumps matters. Jump [n] is taken, only if guard - conditions of jumps [0] to [n-1] evaluated to [false] (like - switch statement in C language). The order of edges in a graph - is unspecified. So, some precaution should be taken, to handle - edge removing and inserting correctly. Each edge is labeled - with abstract label, that represents the jump position in a - graph. - - When an edge is created it will look for corresponding jumps - in source node. If there exists such jump, and it points to - the destination, then it will be left untouched. If it points - to a different node, then it will be fixed to point at the - a given destination. If there is no position in a slot, - represented by the a given label, then it will be - inserted. Dummy jumps will be prepended before the inserted - jump, if needed. - - When an edge is inserted into the graph, then source and - destination nodes are inserted or updated (depending on whether - they were already present in the graph). As a result, the - graph must contain at least nodes, incident to the edge, and - the edge itself. + Edges in IR graph represents a transfer of a control flow between basic + blocks. The basic block in IR is more reach, rather then a node in a + graph. For example, in blk term the order of jumps matters. Jump [n] is + taken, only if guard conditions of jumps [0] to [n-1] evaluated to + [false] (like switch statement in C language). The order of edges in a + graph is unspecified. So, some precaution should be taken, to handle + edge removing and inserting correctly. Each edge is labeled with + abstract label, that represents the jump position in a graph. + + When an edge is created it will look for corresponding jumps in source + node. If there exists such jump, and it points to the destination, then + it will be left untouched. If it points to a different node, then it + will be fixed to point at the a given destination. If there is no + position in a slot, represented by the a given label, then it will be + inserted. Dummy jumps will be prepended before the inserted jump, if + needed. + + When an edge is inserted into the graph, then source and destination + nodes are inserted or updated (depending on whether they were already + present in the graph). As a result, the graph must contain at least + nodes, incident to the edge, and the edge itself. {2 Updating edge} - Updating an edge is basically the same, as updating incident - nodes, a given that the edge exists in the graph. + Updating an edge is basically the same, as updating incident nodes, a + given that the edge exists in the graph. {2 Removing edge} - Removing an edge is not symmetric with edge insertion. It - doesn't remove the incident nodes, but instead removes jumps - from the source node to destination. The jumps are removed - accurately, so that the order (and semantics) is preserved. If - the removed jump was in the middle of the sequence then it is - substituted by a dummy jump with [false] guard. - *) + Removing an edge is not symmetric with edge insertion. It doesn't remove + the incident nodes, but instead removes jumps from the source node to + destination. The jumps are removed accurately, so that the order (and + semantics) is preserved. If the removed jump was in the middle of the + sequence then it is substituted by a dummy jump with [false] guard. *) module Ir : sig type t type edge type node - (** since in IR the order of edges defines semantics, we provide - extra functions *) + (** since in IR the order of edges defines semantics, we provide extra + functions *) module Edge : sig - include Edge with type graph = t - and type node = node - and type t = edge + (**/**) + + include Edge with type graph = t and type node = node and type t = edge - (** [jmps dir e g] enumerates all jumps (including calls, - interrupts, indirects, etc), that occurs before if - [dir = `before] or after if [dir = `after] an edge [e] *) - val jmps : [`after | `before] -> t -> graph -> jmp term seq + (**/**) - (** [edges dir e g] enumerates all edges occurring before of - after an edge [e] in graph [g] *) - val edges : [`after | `before] -> t -> graph -> t seq + val jmps : [ `after | `before ] -> t -> graph -> jmp term seq + (** [jmps dir e g] enumerates all jumps (including calls, interrupts, + indirects, etc), that occurs before if [dir = `before] or after if + [dir = `after] an edge [e] *) + + val edges : [ `after | `before ] -> t -> graph -> t seq + (** [edges dir e g] enumerates all edges occurring before of after an + edge [e] in graph [g] *) - (** [jmp e] returns a jmp term associated with edge [e] *) val jmp : t -> jmp term + (** [jmp e] returns a jmp term associated with edge [e] *) - (** [tid e] returns a tid of a jmp term that is associated - with an edge [e] *) val tid : t -> tid + (** [tid e] returns a tid of a jmp term that is associated with an edge + [e] *) - (** [cond e g] computes a condition expression that is - asserted to be [true] if this branch is taken. - - Note: this is not the same as a condition associated with - the jmp term itself, it takes into account all conditions - preceding the edge. - *) val cond : t -> graph -> exp + (** [cond e g] computes a condition expression that is asserted to be + [true] if this branch is taken. + + Note: this is not the same as a condition associated with the jmp + term itself, it takes into account all conditions preceding the + edge. *) + + (**/**) include Printable.S with type t := t + + (**/**) end (** IR Graph node. Node is labeled by the [blk term].*) module Node : sig - include Node with type graph = t - and type t = node - and type edge = edge - and type label = blk term + include + Node + with type graph = t + and type t = node + and type edge = edge + and type label = blk term + include Printable.S with type t := t end - include Graph with type t := t - and type node := node - and type edge := edge - and type Node.label = blk term - and module Node := Node - and module Edge := Edge + include + Graph + with type t := t + and type node := node + and type edge := edge + and type Node.label = blk term + and module Node := Node + and module Edge := Edge end (** Graph of Term identifiers. - This is a graph where all information is distilled to term - identifiers and relations between them, that are also labeled with - term identifiers. *) + This is a graph where all information is distilled to term identifiers + and relations between them, that are also labeled with term identifiers. + *) module Tid : sig type node = tid - (** [start] is a pseudo node that is used as the entry node of a graph. *) val start : node + (** [start] is a pseudo node that is used as the entry node of a graph. *) - (** [exit] is a pseudo node that is used as the exit node of a graph. *) val exit : node - include Graph with type node := tid - and type Node.label = tid - and type Edge.label = tid - end + (** [exit] is a pseudo node that is used as the exit node of a graph. *) + include + Graph + with type node := tid + and type Node.label = tid + and type Edge.label = tid + end end (** The interface to the disassembler level. - The following definitions are used in documentation of modules - and functions in this interface. + The following definitions are used in documentation of modules and + functions in this interface. - An {i instruction} is a sequence of consecutive bytes that has - known decoding in the given instruction set architecture - (ISA). The following semantic properties of an instruction, as - provided by ISA specification. In the definitions below the - following properties play an important role + An {i instruction} is a sequence of consecutive bytes that has known + decoding in the given instruction set architecture (ISA). The following + semantic properties of an instruction, as provided by ISA specification. + In the definitions below the following properties play an important role (see {!Insn.property} for more details about the properties): - [call]; - [jump]. - An {i instruction address} is the address of the first byte of - the instruction. + An {i instruction address} is the address of the first byte of the + instruction. - A [jump] instruction {i destination} is an address defined by - ISA specification to which the control flow should transfer if - the jump is taken. Potentially, it is possible that the - destination of a jump instruction follows the instruction, but - otherwise, the instruction that follows the instruction is not - the destination, only destinations of the {i taken} jump are - considered to be in the set of destinations of an instruction. + A [jump] instruction {i destination} is an address defined by ISA + specification to which the control flow should transfer if the jump is + taken. Potentially, it is possible that the destination of a jump + instruction follows the instruction, but otherwise, the instruction that + follows the instruction is not the destination, only destinations of the + {i taken} jump are considered to be in the set of destinations of an + instruction. - An instruction is a {i conditional} [jump] if it is a [jump] - instruction that is not always taken, as defined by the ISA - specification. + An instruction is a {i conditional} [jump] if it is a [jump] instruction + that is not always taken, as defined by the ISA specification. - An instruction is a {i barrier} if it a [jump] that is not a - [call] and is not conditional. + An instruction is a {i barrier} if it a [jump] that is not a [call] and is + not conditional. - An {i execution order}, is an order in which CPU executes - instructions. + An {i execution order}, is an order in which CPU executes instructions. - The {i linear order} of a sequence of instructions is the - ascending order of their addresses. + The {i linear order} of a sequence of instructions is the ascending order + of their addresses. - An instruction is {i delayed} by [m > 0] instructions if it takes - effect not immediately but after [m] other instructions are - executed. + An instruction is {i delayed} by [m > 0] instructions if it takes effect + not immediately but after [m] other instructions are executed. - An instruction [i(k)] {i follows} the instruction [i(j)] if - [i(j)] is not a barrier and either the address of [i(k)] is - the successor of the address of the last byte of [i(j)] or if - either [i(k+m)] or [i(k)] is an instruction that is delayed - by [m > 0] instructions. + An instruction [i(k)] {i follows} the instruction [i(j)] if [i(j)] is not + a barrier and either the address of [i(k)] is the successor of the address + of the last byte of [i(j)] or if either [i(k+m)] or [i(k)] is an + instruction that is delayed by [m > 0] instructions. A {i chain of instructions} is a sequence of instruction - [{i(0); ...; i(k),i(k+1),i(n)}] so that i(k+1) is either - a resolved destination of [i(k)] or follows it. An instruction - can belong to more than one chain. + [{i(0); ...; i(k),i(k+1),i(n)}] so that i(k+1) is either a resolved + destination of [i(k)] or follows it. An instruction can belong to more + than one chain. - A {i valid chain of instructions} is a chain where the last - instruction is a [jump] instruction that is either indirect or - its destinations belong to some previous jump in the same - chain. + A {i valid chain of instructions} is a chain where the last instruction is + a [jump] instruction that is either indirect or its destinations belong to + some previous jump in the same chain. An instruction is {i valid} if it belongs to a valid chain of instructions. - A byte is {i data} if one the following is true: - 1) its address is an address of an instruction that is not valid; - 2) it was classified in the knowledge base as data; - 3) it is not an instruction. + A byte is {i data} if one the following is true: 1) its address is an + address of an instruction that is not valid; 2) it was classified in the + knowledge base as data; 3) it is not an instruction. - A {i basic block} is an non-empty instruction chain - [{i(1); ... i(n)}] such that for each [1 < i <= n], + A {i basic block} is an non-empty instruction chain [{i(1); ... i(n)}] + such that for each [1 < i <= n], - [i(i)] follows [i(i-1)]; - - there is no valid instruction in the knowledge base that has - [i(i)] as a known destination; + - there is no valid instruction in the knowledge base that has [i(i)] as a + known destination; - [i(i)] is not a jump when [i < n]. A {i subroutine} is a non-empty finite set of basic blocks [{b(1); ..; b(n)}] such that [b(1)] dominates each block in - [{b(2); ..; b(n)}] (which also implies that they are - reachable) and [b(1)] is called the {i entry} block (or point). - *) + [{b(2); ..; b(n)}] (which also implies that they are reachable) and [b(1)] + is called the {i entry} block (or point). *) module Disasm : sig - (** Disassembler Driver. This is a low-level interface to the CFG reconstruction and - disassembling engine. It is used by BAP's high-level - components, such as the recursive-descent disassembler, so - there is in general no need to use it directly, unless you're - devising a custom disassembly pipe-line. - + disassembling engine. It is used by BAP's high-level components, such as + the recursive-descent disassembler, so there is in general no need to + use it directly, unless you're devising a custom disassembly pipe-line. - The disassembler is driven and controlled by the knowledge - base, so it is possible to modify the behavior of the BAP - disassembler layer through the knowledge base and turn the - default recursive-descent mode into something more - conservative, e.g., speculative, superset, shingled, or even - probabilistic disassembler. + The disassembler is driven and controlled by the knowledge base, so it + is possible to modify the behavior of the BAP disassembler layer through + the knowledge base and turn the default recursive-descent mode into + something more conservative, e.g., speculative, superset, shingled, or + even probabilistic disassembler. {3 Algorithm} {4 Memory Classification} When the driver is fed with a new memory region (using the - {!Driver.scan} function), it uses the knowledge base to - initially classify addresses that belong to this region. + {!Driver.scan} function), it uses the knowledge base to initially + classify addresses that belong to this region. For each byte in the region, it creates a temporary - [core-theory:program] object and sets is [label-address] - property to the address of that byte. It then queries if it is - a function start ([core-theory:is-function]) and if it is - known to be code or data (if the [core-theory:is-valid] - property is [(true)] then it is classified as code and if it - [(false)] then it is data, otherwise, it is undetermined). All - objects created during classification are deleted immediately - after the query and never committed to the knowledge base - (they are scoped objects). Therefore it is fine to speculate - and assume that all bytes are code by providing [(true)] to - the [is-valid] property of each byte. - + [core-theory:program] object and sets is [label-address] property to the + address of that byte. It then queries if it is a function start + ([core-theory:is-function]) and if it is known to be code or data (if + the [core-theory:is-valid] property is [(true)] then it is classified as + code and if it [(false)] then it is data, otherwise, it is + undetermined). All objects created during classification are deleted + immediately after the query and never committed to the knowledge base + (they are scoped objects). Therefore it is fine to speculate and assume + that all bytes are code by providing [(true)] to the [is-valid] property + of each byte. {4 Disassembling} - The disassembling starts from each function start (as - identified by the previous step) and continues until there is - no more unprocessed function starts and all addresses, which - were classified as code, are either successfully disassembled - or proved to be data. - - During disassembling the driver will discover more jump - destinations and add them to the worklist. The default mode is - speculative, i.e., when we meet a barrier, we continue - disassembling beyond it. If the worklist is empty, but the set - of addresses that were classified as code (in the first step) - is still not empty, which means that these addresses are not - reachable from the initially provided starting points - (function starts) then the minimal address is extracted from - the set and is assumed to be a start of a basic block and - added to the worklist. - - The process continues until both the worklist and the set of - code addresses are empty. When the process converges the - knowledge base will contain all disassembled instructions - (though some of them might be invalid). The result of the - disassembly is the value that contains information sufficient - to reconstruct the control-flow graph of the program. It could - be queried directly, using various accessors or folded over - with the [explore] function, which is a generalized - control-flow graph building function. + The disassembling starts from each function start (as identified by the + previous step) and continues until there is no more unprocessed function + starts and all addresses, which were classified as code, are either + successfully disassembled or proved to be data. + + During disassembling the driver will discover more jump destinations and + add them to the worklist. The default mode is speculative, i.e., when we + meet a barrier, we continue disassembling beyond it. If the worklist is + empty, but the set of addresses that were classified as code (in the + first step) is still not empty, which means that these addresses are not + reachable from the initially provided starting points (function starts) + then the minimal address is extracted from the set and is assumed to be + a start of a basic block and added to the worklist. + + The process continues until both the worklist and the set of code + addresses are empty. When the process converges the knowledge base will + contain all disassembled instructions (though some of them might be + invalid). The result of the disassembly is the value that contains + information sufficient to reconstruct the control-flow graph of the + program. It could be queried directly, using various accessors or folded + over with the [explore] function, which is a generalized control-flow + graph building function. {4 Backtracking} - The disassembler has a backtracking mechanism that enables it - to track each disassembled byte back to the memory byte that - was initially marked as code or a function start. When we - identify an instruction chain that is invalid, i.e., when data - follow a machine instruction or when its destination is some - data, we retract the whole chain of instructions. This ensures - that all valid instructions belong to at least one valid + The disassembler has a backtracking mechanism that enables it to track + each disassembled byte back to the memory byte that was initially marked + as code or a function start. When we identify an instruction chain that + is invalid, i.e., when data follow a machine instruction or when its + destination is some data, we retract the whole chain of instructions. + This ensures that all valid instructions belong to at least one valid instruction chain. The justification for not including invalid - instructions chains into the disassembly is that such chains - will unconditionally switch the CPU into the invalid - instruction state which will terminate the program. Since such - a chain can't include system calls or CPU exceptions (both are - not barriers) it can't have any side-effects visible outside - of the process so it could be safely ignored. + instructions chains into the disassembly is that such chains will + unconditionally switch the CPU into the invalid instruction state which + will terminate the program. Since such a chain can't include system + calls or CPU exceptions (both are not barriers) it can't have any + side-effects visible outside of the process so it could be safely + ignored. {4 Delay slots} - Any instruction could have a delay ([core-theory:delay]) that - is greater than zero. In that case the execution order of the - instructions will not be equal to the linear order of the - instructions addresses and [m] instructions that follow the - delayed instruction will be executed before that instruction - (put in the basic block before it), where [m] is the size of - the delay slot, expressed in instructions. + Any instruction could have a delay ([core-theory:delay]) that is greater + than zero. In that case the execution order of the instructions will not + be equal to the linear order of the instructions addresses and [m] + instructions that follow the delayed instruction will be executed before + that instruction (put in the basic block before it), where [m] is the + size of the delay slot, expressed in instructions. - @since 2.0.0 - *) + @since 2.0.0 *) module Driver : sig - + type state [@@deriving bin_io] (** information necessary to build the control-flow graph. - @since 2.2.0 implements [bin_io] - *) - type state [@@deriving bin_io] + @since 2.2.0 implements [bin_io] *) - (** abstract type for a sequence of instructions. *) type insns + (** abstract type for a sequence of instructions. *) - - (** abstract representation of a jump instruction. *) type jump + (** abstract representation of a jump instruction. *) - (** [init] the initial disassembler state. *) val init : state + (** [init] the initial disassembler state. *) - (** [merge x y] is a sum of information in states [x] and [y]. - - @since 2.2.0 - *) val merge : state -> state -> state + (** [merge x y] is a sum of information in states [x] and [y]. + @since 2.2.0 *) - (** [equal x y] is [true] if [x] and [y] denote equal graphs. - - @since 2.2.0 - *) val equal : state -> state -> bool + (** [equal x y] is [true] if [x] and [y] denote equal graphs. + @since 2.2.0 *) - (** [scan mem state] updates the state. - - If bytes in [mem] were already scanned then returns [state] - without changes. Nothing is stored in the knowledge base. - - If it is a new memory region then classifies and - disassembles the whole region. For each disassembled address - [p] an object [Theory.Label.for_addr p] is stored in the - knowledge base. It will contain a fully disassembled and - lifted instruction. All instructions that are subroutine - entry points will have [is-subroutine] property set to [(true)]. - - See {! sec:Algorithm} for the detailed description of the - algorithm. - *) val scan : mem -> state -> state knowledge + (** [scan mem state] updates the state. + If bytes in [mem] were already scanned then returns [state] without + changes. Nothing is stored in the knowledge base. - (** [explore ~block ~node ~edge ~init state] builds a - control-flow graph from the [state]. - - This function is a generalized fold function that calls the - user provided functions to construct the graph, which has - abstract type ['g]. - - - [block mem insns] creates a basic block of type ['n] which - covers memory [mem]; [insns] is the sequence of - instructions that constitute that memory, use [list_insns insns] - to get instructions in the linear order of their - addresses, or [execution_order] to get the execution - order (which might be different from linear in the - presence of delayed and speculative instructions). - - - [node x g] inserts the node [x] into graph [g]; - - [edge x y g] inserts an edge between nodes [x] and [y]; - - [init] is the initial graph; - - [follow x] if it returns [true] then the function will - follow this destination. Defaults to a function that - always evaluates to [true]. - - - [entry] is the entry point from which to build the graph, - if absent, then all basic blocks will be consecuitively, - in the order of ascending addresses, used as the entry - points. - - - [entries] is the sequence of entry points, if both [entry] - and [entries] are specified then [entry] is consed with - [entries]. + If it is a new memory region then classifies and disassembles the + whole region. For each disassembled address [p] an object + [Theory.Label.for_addr p] is stored in the knowledge base. It will + contain a fully disassembled and lifted instruction. All instructions + that are subroutine entry points will have [is-subroutine] property + set to [(true)]. - @since 2.2.0 the optional [entries] parameter was added. + See {!Algorithm} for the detailed description of the algorithm. *) - *) val explore : ?entries:addr Sequence.t -> ?entry:addr -> @@ -7686,499 +7240,500 @@ module Std : sig node:('n -> 'c -> 'c knowledge) -> edge:('n -> 'n -> 'c -> 'c knowledge) -> init:'c -> - state -> 'c knowledge + state -> + 'c knowledge + (** [explore ~block ~node ~edge ~init state] builds a control-flow graph + from the [state]. + + This function is a generalized fold function that calls the user + provided functions to construct the graph, which has abstract type + ['g]. + + - [block mem insns] creates a basic block of type ['n] which covers + memory [mem]; [insns] is the sequence of instructions that + constitute that memory, use [list_insns insns] to get instructions + in the linear order of their addresses, or [execution_order] to get + the execution order (which might be different from linear in the + presence of delayed and speculative instructions). + - [node x g] inserts the node [x] into graph [g]; + - [edge x y g] inserts an edge between nodes [x] and [y]; + - [init] is the initial graph; + - [follow x] if it returns [true] then the function will follow this + destination. Defaults to a function that always evaluates to [true]. - (** [list_insns xs] returns the list of instructions in the - ascending order of their addresses (or descending if [rev] - is [true] (defaults to [false]. *) - val list_insns : ?rev:bool -> insns -> Theory.Label.t list + - [entry] is the entry point from which to build the graph, if absent, + then all basic blocks will be consecuitively, in the order of + ascending addresses, used as the entry points. + - [entries] is the sequence of entry points, if both [entry] and + [entries] are specified then [entry] is consed with [entries]. - (** [execution_order xs] reruns a list of instructions in the - order in which they will be executed by the target CPU, - which could be different when instructions are delayed or - speculatively executed. *) - val execution_order : insns -> Theory.Label.t list knowledge + @since 2.2.0 the optional [entries] parameter was added. *) - (** {3 Low-level interface} + val list_insns : ?rev:bool -> insns -> Theory.Label.t list + (** [list_insns xs] returns the list of instructions in the ascending + order of their addresses (or descending if [rev] is [true] (defaults + to [false]. *) - All functions in this interface were made availabe in BAP - 2.2.0 unless stated otherwise. *) + val execution_order : insns -> Theory.Label.t list knowledge + (** [execution_order xs] reruns a list of instructions in the order in + which they will be executed by the target CPU, which could be + different when instructions are delayed or speculatively executed. *) + (** {3 Low-level interface} - (** [subroutines state] is a set of subroutine entry points that - either were provided through the knowledge base or later - discovered as destinations of call instructions. + All functions in this interface were made availabe in BAP 2.2.0 unless + stated otherwise. *) - @since 2.2.0 - *) val subroutines : state -> Set.M(Addr).t + (** [subroutines state] is a set of subroutine entry points that either + were provided through the knowledge base or later discovered as + destinations of call instructions. - (** [blocks state] is the set of addresses of instructions that - start basic blocks. - @since 2.2.0 + @since 2.2.0 *) - *) val blocks : state -> Set.M(Addr).t + (** [blocks state] is the set of addresses of instructions that start + basic blocks. + @since 2.2.0 *) - + val is_data : state -> addr -> bool (** [is_data state x] is [true] if [x] was classified as data. @since 2.2.0 *) - val is_data : state -> addr -> bool + val is_subroutine : state -> addr -> bool (** [is_subroutine s x] is [Set.mem (subroutines s) x]. @since 2.2.0 *) - val is_subroutine : state -> addr -> bool + val is_block : state -> addr -> bool (** [is_block s x] is [Set.mem (blocks s) x]. @since 2.2.0 *) - val is_block : state -> addr -> bool - + val is_jump : state -> addr -> bool (** [is_jump s x] is [true] if [x] is the maximal address of an instruction in the basic block that terminates with a jump. - Note, that when jumps are delayed, the linear order of - instructions differs from the execution order, so the - address of the last instruction is not the maximal address - in the basic block. + Note, that when jumps are delayed, the linear order of instructions + differs from the execution order, so the address of the last + instruction is not the maximal address in the basic block. @since 2.2.0 *) - val is_jump : state -> addr -> bool - - (** [jump state src] if [src] is the last in the linear order - instruction of a basic block that terminates with a jump - instruction then returns the descrption of that jump - instruction. - @since 2.2.0 - *) val jump : state -> addr -> jump option + (** [jump state src] if [src] is the last in the linear order instruction + of a basic block that terminates with a jump instruction then returns + the descrption of that jump instruction. + @since 2.2.0 *) - (** [destinations jump] returns the set of resolved - destinations. - @since 2.2.0 - *) val destinations : jump -> Set.M(Addr).t + (** [destinations jump] returns the set of resolved destinations. + @since 2.2.0 *) - + val is_call : jump -> bool (** [is_call jump] is [true] if jump is [call]. @since 2.2.0 *) - val is_call : jump -> bool + val is_barrier : jump -> bool (** [is_barrier jump] is [true] if jump is [barrier]. @since 2.2.0 *) - val is_barrier : jump -> bool end (** A set of subroutines. - A partition of a whole program control-flow graph into a - quotient set of subroutines. + A partition of a whole program control-flow graph into a quotient set of + subroutines. - The algorithm builds the minimal partion, i.e., it guarantees - that no other partition exists that has fewer - elements. + The algorithm builds the minimal partion, i.e., it guarantees that no + other partition exists that has fewer elements. - The number of elements heavily depends on the number of - function starts that were provided as the input (e.g., if the - function start identification procedure has a lot of false - positives, expect a lot of subroutines). + The number of elements heavily depends on the number of function starts + that were provided as the input (e.g., if the function start + identification procedure has a lot of false positives, expect a lot of + subroutines). - @since made public since 2.2.0 - *) + @since made public since 2.2.0 *) module Subroutines : sig type t [@@deriving bin_io] - (** [empty] is the empty partition. *) val empty : t + (** [empty] is the empty partition. *) - (** [equal p1 p2] is [true] if [p1] is structurally equal [p2]. *) val equal : t -> t -> bool + (** [equal p1 p2] is [true] if [p1] is structurally equal [p2]. *) - (** [update p s] updates the partitioning with the newly - disassembled blocks. *) val update : t -> Driver.state -> t knowledge + (** [update p s] updates the partitioning with the newly disassembled + blocks. *) - (** [belongs part ~entry addr] is true if [addr] belongs to a - basic block of subroutine with the given [entry]. *) val belongs : t -> entry:addr -> addr -> bool + (** [belongs part ~entry addr] is true if [addr] belongs to a basic block + of subroutine with the given [entry]. *) - (** [entries part] is the set of entry points of all - subroutines in the partition. - - [Set.length (entries part)] is the cardinality of the - partition. *) val entries : t -> Set.M(Addr).t + (** [entries part] is the set of entry points of all subroutines in the + partition. + + [Set.length (entries part)] is the cardinality of the partition. *) - (** [siblings part x y] is [true] if [x] and [y] belong to the - same subroutine. *) val siblings : t -> addr -> addr -> bool + (** [siblings part x y] is [true] if [x] and [y] belong to the same + subroutine. *) end type t = disasm - (** [create cfg] *) val create : cfg -> t + (** [create cfg] *) - (** [disassemble ?roots arch mem] disassemble provided memory region - [mem] using best available algorithm and backend for the specified - [arch]. Roots, if provided, should point to memory regions, that - are believed to contain code. At best, this should be a list of - function starts. If no roots are provided, then the starting - address of the provided memory [mem] will be used as a root. - - The returned value will contain all memory reachable from the - a given set of roots, at our best knowledge. *) val of_mem : ?backend:string -> ?brancher:brancher -> - ?rooter:rooter -> arch -> mem -> t Or_error.t + ?rooter:rooter -> + arch -> + mem -> + t Or_error.t + (** [disassemble ?roots arch mem] disassemble provided memory region [mem] + using best available algorithm and backend for the specified [arch]. + Roots, if provided, should point to memory regions, that are believed to + contain code. At best, this should be a list of function starts. If no + roots are provided, then the starting address of the provided memory + [mem] will be used as a root. + + The returned value will contain all memory reachable from the a given + set of roots, at our best knowledge. *) - (** [disassemble_image image] disassemble a given image. - Will take executable segments of the image and disassemble it, - applying [disassemble] function. If no roots are specified, then - symbol table will be used as a source of roots. If file doesn't - contain one, then entry point will be used. - *) val of_image : ?backend:string -> - ?brancher:brancher -> ?rooter:rooter -> image -> t Or_error.t + ?brancher:brancher -> + ?rooter:rooter -> + image -> + t Or_error.t + (** [disassemble_image image] disassemble a given image. Will take + executable segments of the image and disassemble it, applying + [disassemble] function. If no roots are specified, then symbol table + will be used as a source of roots. If file doesn't contain one, then + entry point will be used. *) - (** [disassemble_file ?roots path] takes a path to a binary and - disassembles it *) val of_file : ?backend:string -> - ?brancher:brancher -> ?rooter:rooter -> - ?loader:string -> string -> t Or_error.t - - (** [With_exn.f] is the same as [f] except that it throws an - exception instead of returning [Error]. *) + ?brancher:brancher -> + ?rooter:rooter -> + ?loader:string -> + string -> + t Or_error.t + (** [disassemble_file ?roots path] takes a path to a binary and disassembles + it *) + + (** [With_exn.f] is the same as [f] except that it throws an exception + instead of returning [Error]. *) module With_exn : sig + val of_mem : + ?backend:string -> + ?brancher:brancher -> + ?rooter:rooter -> + arch -> + mem -> + t + (** see {!Disasm.of_mem} *) - (** see {!Disasm.of_mem} *) - val of_mem : ?backend:string -> ?brancher:brancher -> - ?rooter:rooter -> arch -> mem -> t - (** see {!Disasm.of_image} *) - val of_image : ?backend:string -> ?brancher:brancher -> - ?rooter:rooter -> image -> t + val of_image : + ?backend:string -> ?brancher:brancher -> ?rooter:rooter -> image -> t + (** see {!Disasm.of_image} *) + val of_file : + ?backend:string -> + ?brancher:brancher -> + ?rooter:rooter -> + ?loader:string -> + string -> + t (** see {!Disasm.of_file} *) - val of_file : ?backend:string -> ?brancher:brancher -> - ?rooter:rooter -> ?loader:string -> string -> t end - (** [merge d1 d2] is a union of control flow graphs and erros of - the two disassemblers. *) val merge : t -> t -> t + (** [merge d1 d2] is a union of control flow graphs and erros of the two + disassemblers. *) - (** returns all instructions that was successfully decoded in an - ascending order of their addresses. Each instruction is - accompanied with its block of memory. *) val insns : t -> (mem * insn) seq + (** returns all instructions that was successfully decoded in an ascending + order of their addresses. Each instruction is accompanied with its block + of memory. *) - (** A whole program CFG. *) val cfg : t -> cfg + (** A whole program CFG. *) - (** {2 Tags} *) + (** {2 Tags} *) - (** machine instruction. *) val insn : insn tag + (** machine instruction. *) end type symtab - (** Reconstructed symbol table. *) + (** Reconstructed symbol table. *) module Symtab : sig - (** This data structure holds information about functions that - were found in the executable.*) + (** This data structure holds information about functions that were found in + the executable.*) - (** symbol table *) type t = symtab [@@deriving sexp_of] + (** symbol table *) - (** [(name,entry,graph)] a simple representation of a function *) type fn = string * block * cfg [@@deriving sexp_of] + (** [(name,entry,graph)] a simple representation of a function *) - (** empty symbol table *) val empty : t + (** empty symbol table *) - (** [create disasm calls] creates the symbol table given the - disassembly state [disasm] and callgraph [calls]. - - @since 2.6.0 - *) val create : Disasm.Driver.state -> Disasm.Subroutines.t -> t KB.t + (** [create disasm calls] creates the symbol table given the disassembly + state [disasm] and callgraph [calls]. + + @since 2.6.0 *) - (** [add_symbol table name entry blocks] extends [table] with a - new symbol with a given [name], [entry] block and body - [blocks]. *) val add_symbol : t -> fn -> t + (** [add_symbol table name entry blocks] extends [table] with a new symbol + with a given [name], [entry] block and body [blocks]. *) - (** [remove table fn] removes symbol [fn] from [table] *) val remove : t -> fn -> t + (** [remove table fn] removes symbol [fn] from [table] *) - (** [find_by_name symbols name] finds a symbol with a given name. *) - val find_by_name : t -> string -> fn option + val find_by_name : t -> string -> fn option + (** [find_by_name symbols name] finds a symbol with a given name. *) - (** [find_by_start symbols addr] finds a symbol that starts from - a given address *) val find_by_start : t -> addr -> fn option + (** [find_by_start symbols addr] finds a symbol that starts from a given + address *) - (** [owners addr] return a list of functions that owns [addr] *) val owners : t -> addr -> fn list + (** [owners addr] return a list of functions that owns [addr] *) - (** [dominators syms mem] returns a list of functions that - dominates over provided memory region [mem] *) val dominators : t -> mem -> fn list + (** [dominators syms mem] returns a list of functions that dominates over + provided memory region [mem] *) - (** [intersecting_mem syms mem] returns a list of functions, that - resides in memory region [mem] *) val intersecting : t -> mem -> fn list + (** [intersecting_mem syms mem] returns a list of functions, that resides in + memory region [mem] *) - (** [to_sequence symtab] returns a sequence of functions *) val to_sequence : t -> fn seq + (** [to_sequence symtab] returns a sequence of functions *) - (** [span fn] returns a memory map of a region occupied by a - function [fn] *) val span : fn -> unit memmap - - (** [callee symtab address] returns a callee which is - called from a block with the given [address]. - - @since 2.5.0 + (** [span fn] returns a memory map of a region occupied by a function [fn] *) + val callee : t -> addr -> string option + (** [callee symtab address] returns a callee which is called from a block + with the given [address]. + + @since 2.5.0 *) end type lifter = mem -> Disasm_expert.Basic.full_insn -> bil Or_error.t (** A BIL model of CPU - In general this is a model of a processor architecture, involving - ALU, processing unit, registers and memory. + In general this is a model of a processor architecture, involving ALU, + processing unit, registers and memory. - The definitions in this module are so generic, that they - present on all processors. - *) + The definitions in this module are so generic, that they present on all + processors. *) module type CPU = sig - - (** A set of general purpose registers *) val gpr : Var.Set.t + (** A set of general purpose registers *) - (** Memory *) val mem : var + (** Memory *) - (** Stack pointer *) - val sp : var + val sp : var + (** Stack pointer *) - (** {4 Flag registers} *) + (** {4 Flag registers} *) - (** zero flag *) - val zf : var + val zf : var + (** zero flag *) - (** carry flag *) - val cf : var + val cf : var + (** carry flag *) - (** overflow flag *) - val vf : var + val vf : var + (** overflow flag *) - (** negative flag *) - val nf : var + val nf : var + (** negative flag *) - (** {3 Predicates} *) + (** {3 Predicates} *) - (** [is_reg var] true if [var] is a processor register *) val is_reg : var -> bool + (** [is_reg var] true if [var] is a processor register *) - (** [is_flag reg] is true if [reg] is a flag register *) val is_flag : var -> bool + (** [is_flag reg] is true if [reg] is a flag register *) - (** [is_sp x = Var.same x sp] *) val is_sp : var -> bool + (** [is_sp x = Var.same x sp] *) - (** [is_bp x] is true if [x] can be possibly used as a base - pointer register. *) val is_bp : var -> bool + (** [is_bp x] is true if [x] can be possibly used as a base pointer + register. *) - (** [is_zf x = Var.same x zf] *) val is_zf : var -> bool + (** [is_zf x = Var.same x zf] *) - (** [is_cf x = Var.same x cf] *) val is_cf : var -> bool + (** [is_cf x = Var.same x cf] *) - (** [is_vf x = Var.same x vf] *) val is_vf : var -> bool + (** [is_vf x = Var.same x vf] *) - (** [is_nf x = Var.same x nf] *) val is_nf : var -> bool + (** [is_nf x = Var.same x nf] *) - (** [is_mem x = Var.same x mem] *) val is_mem : var -> bool + (** [is_mem x = Var.same x mem] *) end (** Abstract interface for all targets. - Each target supported by BAP implements this interface. To get - access to the implementation use - {{!target_of_arch}target_of_arch} function. Code written using - this interface is cross-platform, i.e., target agnostic. If you - want to write target-specific code, then use directly - corresponding libraries: {{!ARM}ARM}, {{!IA32}IA32}, - {{!AMD64}AMD64}. *) + Each target supported by BAP implements this interface. To get access to + the implementation use {{!target_of_arch}target_of_arch} function. Code + written using this interface is cross-platform, i.e., target agnostic. If + you want to write target-specific code, then use directly corresponding + libraries: {{!ARM}ARM}, {{!IA32}IA32}, {{!AMD64}AMD64}. *) module type Target = sig - - (** access to the CPU variables. *) module CPU : CPU + (** access to the CPU variables. *) - (** [lift mem insn] lifts provided instruction to BIL. - Usually you do not need to call this function directly, as - [disassemble] function will do the lifting. - - postcondition: the returned BIL code is well-typed. - *) val lift : lifter + (** [lift mem insn] lifts provided instruction to BIL. Usually you do not + need to call this function directly, as [disassemble] function will do + the lifting. + + postcondition: the returned BIL code is well-typed. *) end - (** [target_of_arch arch] returns a module packed into value, that - abstracts target architecture. The returned module has type - {!Target} and can be unpacked locally with: + val target_of_arch : arch -> (module Target) + (** [target_of_arch arch] returns a module packed into value, that abstracts + target architecture. The returned module has type {!Target} and can be + unpacked locally with: {[ let module Target = (val target_of_arch arch) in ]} *) - val target_of_arch : arch -> (module Target) - (** Register new target architecture. If target for the given arch - already exists, then it will be superseded by the new - target. *) val register_target : arch -> (module Target) -> unit + (** Register new target architecture. If target for the given arch already + exists, then it will be superseded by the new target. *) (** Term identifier. - A term identifier is a knowledge object of class - [core-theory:program] that represents a program. Objects of this - class have many properties that describe program syntax - (representation) and semantics. The set of properties is - extensible and each plugin/library can add its own properties, - for the current set of all properties of all classes, see - [bap list classes], or [bap list class -f core-theory:program] - to list the properties of the class to which Tid.t belongs. - - - *) + A term identifier is a knowledge object of class [core-theory:program] + that represents a program. Objects of this class have many properties that + describe program syntax (representation) and semantics. The set of + properties is extensible and each plugin/library can add its own + properties, for the current set of all properties of all classes, see + [bap list classes], or [bap list class -f core-theory:program] to list the + properties of the class to which Tid.t belongs. *) module Tid : sig type t = tid + val create : unit -> t (** [create ()] creates a fresh newly term identifier. - This function has a side-effect of changing the Toplevel - knowledge base. + This function has a side-effect of changing the Toplevel knowledge base. *) - val create : unit -> t - + val for_name : ?package:string -> string -> t (** [for_name name] creates a Term identifier for the given [name]. - Creates a new program object that denotes a program with the - given name. See [Theory.Label.for_name] from the - [Bap_core_theory] interface for more information. + Creates a new program object that denotes a program with the given name. + See [Theory.Label.for_name] from the [Bap_core_theory] interface for + more information. @since 2.0.0 - @since 2.2.0 has the optional [package] parameter. - *) - val for_name : ?package:string -> string -> t - + @since 2.2.0 has the optional [package] parameter. *) + val for_addr : ?package:string -> addr -> t (** [for_addr addr] creates a Term identifier for the given [addr]. - Creates a new program object that denotes a program with the - given addr. See [Theory.Label.for_addr] from the - [Bap_core_theory] interface for more information. + Creates a new program object that denotes a program with the given addr. + See [Theory.Label.for_addr] from the [Bap_core_theory] interface for + more information. @since 2.0.0 - @since 2.2.0 has the optional [package] parameter. - *) - val for_addr : ?package:string -> addr -> t + @since 2.2.0 has the optional [package] parameter. *) + val for_ivec : ?package:string -> int -> t (** [for_ivec ivec] creates a Term identifier for the given [ivec]. - Creates a new program object that denotes a program with the - given ivec. See [Theory.Label.for_ivec] from the - [Bap_core_theory] interface for more information. + Creates a new program object that denotes a program with the given ivec. + See [Theory.Label.for_ivec] from the [Bap_core_theory] interface for + more information. @since 2.0.0 - @since 2.2.0 has the optional [package] parameter. - *) - val for_ivec : ?package:string -> int -> t - + @since 2.2.0 has the optional [package] parameter. *) - (** [set_name tid name] associates a [name] with a given - term identifier [tid]. Any previous associations are - overridden.*) val set_name : tid -> string -> unit + (** [set_name tid name] associates a [name] with a given term identifier + [tid]. Any previous associations are overridden.*) - (** [name tid] returns a term name: either a string name - with at-prefix, or number identifier. *) val name : tid -> string + (** [name tid] returns a term name: either a string name with at-prefix, or + number identifier. *) - (** [from_string s] parses tid from string. The expected - format is: + val from_string : string -> tid Or_error.t + (** [from_string s] parses tid from string. The expected format is: {v tid = symbol | number. symbol = "@", string. number = "%", hex. string = ?sequence of characters?. number = ?ocaml hexadecimal representation?. - v} - *) - val from_string : string -> tid Or_error.t + v} *) - (** [from_string_exn s] same as [from_string_exn] but throws - exception on error. *) val from_string_exn : string -> tid + (** [from_string_exn s] same as [from_string_exn] but throws exception on + error. *) - (** infix notation for [from_string_exn] *) - val (!!) : string -> tid + val ( !! ) : string -> tid + (** infix notation for [from_string_exn] *) include Regular.S with type t := t end (** Live Variables. - Computes the set of live variables for each block in a - subroutine, taking into account subroutine arguments. + Computes the set of live variables for each block in a subroutine, taking + into account subroutine arguments. - A variable [v] is {i live} at a program point [x] if there exists a - path from [x] to a use of [v] that doesn't go through a - definition of [v]. + A variable [v] is {i live} at a program point [x] if there exists a path + from [x] to a use of [v] that doesn't go through a definition of [v]. - This module computes liveness on the block granularity, which - gives rise to the following notions. + This module computes liveness on the block granularity, which gives rise + to the following notions. A variable is {i live-in} at a basic block [x] if it is live at the begining of the block [x]. - A variable is {i live-out} at a basic block [x] if it is live on - any of the outcoming edges of [x]. + A variable is {i live-out} at a basic block [x] if it is live on any of + the outcoming edges of [x]. - A variable is {i live-through} at a basic block [x] if it is - both live-in and live-out at it. + A variable is {i live-through} at a basic block [x] if it is both live-in + and live-out at it. {3 Liveness in the SSA form} - The algorithm works with the SSA form. From the perspective of - liveness, the phi-nodes define their right-hand sides on the - edges incoming from the corresponding blocks. More formally, for - a phi-node at block [b0], [x0 := phi([b1,x1; b2,x2;...;bN,xN])], - the defined variable [x0] is considered to be defined at blocks - [bi] and the variable [xi] live-out of basic block [bi], for - [0 < i <= N]. + The algorithm works with the SSA form. From the perspective of liveness, + the phi-nodes define their right-hand sides on the edges incoming from the + corresponding blocks. More formally, for a phi-node at block [b0], + [x0 := phi([b1,x1; b2,x2;...;bN,xN])], the defined variable [x0] is + considered to be defined at blocks [bi] and the variable [xi] live-out of + basic block [bi], for [0 < i <= N]. Intuitively, this can be visualized as if the following program, {v - b1: x.1 := 1 goto b3 @@ -8195,7 +7750,6 @@ module Std : sig is rewritten to an equivalent (but not in SSA) program, {v - b1: x.1 := 1 x.3 := x1 @@ -8210,422 +7764,416 @@ module Std : sig ... v} - That means that a variable defined by a phi-node in a block [x] - could be live-in at the block [x]. + That means that a variable defined by a phi-node in a block [x] could be + live-in at the block [x]. - The {!Live.defs} and {!Live.uses} includes the variables from - the corresponding phi-nodes, e.g., [Live.defs b1] will is - [{x1,x3}] and [Live.uses b1] is [{x1}]. + The {!Live.defs} and {!Live.uses} includes the variables from the + corresponding phi-nodes, e.g., [Live.defs b1] will is [{x1,x3}] and + [Live.uses b1] is [{x1}]. @since 2.5.0 - @before 2.5.0 see {!Sub.compute_liveness} *) + @before 2.5.0 see {!Sub.compute_liveness} *) module Live : sig type t + val compute : ?keep:Var.Set.t -> sub term -> t (** [compute sub] computes the subroutine liveness information. - Variables specified by [keep] will be kept live at the exit - from the function. In addition to the variables in [keep], - all free variables used by in and in/out arguments of the - subroutine will be kept alive at the exit.*) - val compute : ?keep:Var.Set.t -> sub term -> t + Variables specified by [keep] will be kept live at the exit from the + function. In addition to the variables in [keep], all free variables + used by in and in/out arguments of the subroutine will be kept alive at + the exit.*) + val vars : t -> Var.Set.t (** [vars live] the variables that are live in the subroutine. - The set of variables that are live-in on the entry block of - the subroutine that was used to compute [live]. The live - variables of the subroutine also called free variables or - upper-exposed variables. They may be used in the subroutine - before they are assigned. *) - val vars : t -> Var.Set.t + The set of variables that are live-in on the entry block of the + subroutine that was used to compute [live]. The live variables of the + subroutine also called free variables or upper-exposed variables. They + may be used in the subroutine before they are assigned. *) + val ins : t -> tid -> Var.Set.t (** [ins live blk] the set of live-in variables at [blk]. - The set of variables that are live at the entry to the basic - block [blk]. + The set of variables that are live at the entry to the basic block + [blk]. - Returns an empty set if [blk] doesn't belong to the [sub] graph. - *) - val ins : t -> tid -> Var.Set.t + Returns an empty set if [blk] doesn't belong to the [sub] graph. *) + val outs : t -> tid -> Var.Set.t (** [outs live blk] the set of live-outs variables at [blk]. - The set of variables that are live at the exist from the basic - block [blk]. + The set of variables that are live at the exist from the basic block + [blk]. Returns an empty set if [blk] doesn't belong to the [sub] graph.*) - val outs : t -> tid -> Var.Set.t - (** [blks live var] the set of blks where [var] is live-in. *) val blks : t -> var -> Set.M(Tid).t + (** [blks live var] the set of blks where [var] is live-in. *) + val defs : t -> tid -> Var.Set.t (** [defs live blk] the set of variables defined by [blk]. - Aka the {i KILL} set, i.e., the set of variables whose - liveness is killed in the block [blk]. *) - val defs : t -> tid -> Var.Set.t + Aka the {i KILL} set, i.e., the set of variables whose liveness is + killed in the block [blk]. *) + val uses : t -> tid -> Var.Set.t (** [uses live blk] the set of variables used by [blk]. - Aka the {i GEN} set, i.e., the set of variables generated by - the block [blk]. *) - val uses : t -> tid -> Var.Set.t + Aka the {i GEN} set, i.e., the set of variables generated by the block + [blk]. *) + val fold : t -> init:'a -> f:('a -> tid -> Var.Set.t -> 'a) -> 'a (** [fold live ~init ~f] folds over live-ins of blocks. - Applies [f] to live-in set of variables of each block of the - subroutine. + Applies [f] to live-in set of variables of each block of the subroutine. Note, pseudo start and exit nodes are not folded over. *) - val fold : t -> init:'a -> f:('a -> tid -> Var.Set.t -> 'a) -> 'a + val solution : t -> (tid, Var.Set.t) Solution.t (** [solution live] returns the fixed point solution. The solution is the mapping from blocks to their live-outs.*) - val solution : t -> (tid, Var.Set.t) Solution.t - - (** [pp ppf live] prints the live-in sets of each basic block. *) val pp : Format.formatter -> t -> unit + (** [pp ppf live] prints the live-in sets of each basic block. *) end (** IR language term. - Term is a building block of the - {{!sema}Intermediate Representation} of the binary program. + Term is a building block of the {{!sema}Intermediate Representation} of + the binary program. - This module provides functions that are overloaded for - different term classes. Term class is denoted with an explicit - instance of type [('a,'b)cls], where ['a] stands for the parent - term and ['b] for the child term. + This module provides functions that are overloaded for different term + classes. Term class is denoted with an explicit instance of type + [('a,'b)cls], where ['a] stands for the parent term and ['b] for the child + term. {2 Example} Give a block - {[# let b = Blk.create ();;]} - {v val b : Blk.t = - 00000003: v} + {[ + # let b = Blk.create ();; + ]} + {v + val b : Blk.t = + 00000003: + v} - We can append a definition to it with an overloaded - [Term.append] + We can append a definition to it with an overloaded [Term.append] - {[# let b = Term.append def_t b d_1;;]} - {v val b : blk term = - 00000003: - 00000001: x := y + z - v} + {[ + # let b = Term.append def_t b d_1;; + ]} + {v + val b : blk term = + 00000003: + 00000001: x := y + z + v} Update a value of a definition in the block: - {[# let b = Term.update def_t b d_2;;]} - {v val b : blk term = - 00000003: - 00000001: x := true - v} *) + {[ + # let b = Term.update def_t b d_2;; + ]} + {v + val b : blk term = + 00000003: + 00000001: x := true + v} *) module Term : sig - - (** term type *) type 'a t = 'a term + (** term type *) - (** [clone term] creates an object with a fresh new identifier - that has the same contents as [term], i.e., that is - syntactically the same. The clone operation is shallow, all - subterms of [term] are unchanged. - *) val clone : 'a t -> 'a t + (** [clone term] creates an object with a fresh new identifier that has the + same contents as [term], i.e., that is syntactically the same. The clone + operation is shallow, all subterms of [term] are unchanged. *) - (** [same x y] returns true if [x] and [y] represents the same - entity, i.e., [Tid.(tid x = tid y)] *) val same : 'a t -> 'a t -> bool + (** [same x y] returns true if [x] and [y] represents the same entity, i.e., + [Tid.(tid x = tid y)] *) - (** [name t] returns a string representation of a term [t] identity *) val name : 'a t -> string + (** [name t] returns a string representation of a term [t] identity *) - (** [tid entity] returns a unique identifier of the [entity] *) val tid : 'a t -> tid + (** [tid entity] returns a unique identifier of the [entity] *) - (** [length t p] returns an amount of terms of [t] class in a - parent term [p] *) - val length : ('a,'b) cls -> 'a t -> int + val length : ('a, 'b) cls -> 'a t -> int + (** [length t p] returns an amount of terms of [t] class in a parent term + [p] *) - (** [find t p id] is [Some c] if term [p] has a subterm of type [t] - such that [tid c = id]. *) - val find : ('a,'b) cls -> 'a t -> tid -> 'b t option + val find : ('a, 'b) cls -> 'a t -> tid -> 'b t option + (** [find t p id] is [Some c] if term [p] has a subterm of type [t] such + that [tid c = id]. *) - (** [find_exn t p id] like {!find} but raises [Not_found] if nothing - is found. *) - val find_exn : ('a,'b) cls -> 'a t -> tid -> 'b t + val find_exn : ('a, 'b) cls -> 'a t -> tid -> 'b t + (** [find_exn t p id] like {!find} but raises [Not_found] if nothing is + found. *) - (** [update t p c] if term [p] contains a term with id equal to - [tid c] then return [p] with this term substituted with [p] *) - val update : ('a,'b) cls -> 'a t -> 'b t -> 'a t + val update : ('a, 'b) cls -> 'a t -> 'b t -> 'a t + (** [update t p c] if term [p] contains a term with id equal to [tid c] then + return [p] with this term substituted with [p] *) - (** [remove t p id] returns a term that doesn't contain element - with the a given [id] *) - val remove : ('a,_) cls -> 'a t -> tid -> 'a t + val remove : ('a, _) cls -> 'a t -> tid -> 'a t + (** [remove t p id] returns a term that doesn't contain element with the a + given [id] *) - (** [change t p id f] if [p] contains subterm with of a given kind - [t] and identifier [id], then apply [f] to this - subterm. Otherwise, apply [f] to [None]. If [f] return [None], - then remove this subterm (a given it did exist), otherwise, - update parent with a new subterm. *) - val change : ('a,'b) cls -> 'a t -> tid -> ('b t option -> 'b t option) -> 'a t + val change : + ('a, 'b) cls -> 'a t -> tid -> ('b t option -> 'b t option) -> 'a t + (** [change t p id f] if [p] contains subterm with of a given kind [t] and + identifier [id], then apply [f] to this subterm. Otherwise, apply [f] to + [None]. If [f] return [None], then remove this subterm (a given it did + exist), otherwise, update parent with a new subterm. *) - (** [enum ?rev t p] enumerate all subterms of type [t] of the - a given term [p] *) - val enum : ?rev:bool -> ('a,'b) cls -> 'a t -> 'b t seq + val enum : ?rev:bool -> ('a, 'b) cls -> 'a t -> 'b t seq + (** [enum ?rev t p] enumerate all subterms of type [t] of the a given term + [p] *) + val to_sequence : ?rev:bool -> ('a, 'b) cls -> 'a t -> 'b t seq (** [to_sequence ?rev t p] is a synonym for [enum]. *) - val to_sequence : ?rev:bool -> ('a,'b) cls -> 'a t -> 'b t seq - - (** [map t p ~f] returns term [p] with all subterms of type [t] - mapped with function [f] *) - val map : ('a,'b) cls -> 'a t -> f:('b t -> 'b t) -> 'a t - - (** [filter_map t p ~f] returns term [p] with all subterms of type - [t] filter_mapped with function [f], i.e., all terms for which - function [f] returned [Some thing] are substituted by the - [thing], otherwise they're removed from the parent term *) - val filter_map : ('a,'b) cls -> 'a t -> f:('b t -> 'b t option) -> 'a t - - (** [concat_map t p ~f] substitute subterm [c] of type [t] in - parent term [p] with [f c]. If [f c] is an empty list, then - [c] doesn't occur in a new parent term, if [f c] is a - singleton list, then [c] is substituted with the [f c], like - in [map]. If [f c] is a list of [n] elements, then in the - place of [c] this [n] elements are inserted. *) - val concat_map : ('a,'b) cls -> 'a t -> f:('b t -> 'b t list) -> 'a t + val map : ('a, 'b) cls -> 'a t -> f:('b t -> 'b t) -> 'a t + (** [map t p ~f] returns term [p] with all subterms of type [t] mapped with + function [f] *) + + val filter_map : ('a, 'b) cls -> 'a t -> f:('b t -> 'b t option) -> 'a t + (** [filter_map t p ~f] returns term [p] with all subterms of type [t] + filter_mapped with function [f], i.e., all terms for which function [f] + returned [Some thing] are substituted by the [thing], otherwise they're + removed from the parent term *) + + val concat_map : ('a, 'b) cls -> 'a t -> f:('b t -> 'b t list) -> 'a t + (** [concat_map t p ~f] substitute subterm [c] of type [t] in parent term + [p] with [f c]. If [f c] is an empty list, then [c] doesn't occur in a + new parent term, if [f c] is a singleton list, then [c] is substituted + with the [f c], like in [map]. If [f c] is a list of [n] elements, then + in the place of [c] this [n] elements are inserted. *) + + val filter : ('a, 'b) cls -> 'a t -> f:('b t -> bool) -> 'a t (** [filter t p ~f] returns a term [p] with subterms [c] for which [f c = false] removed. *) - val filter : ('a,'b) cls -> 'a t -> f:('b t -> bool) -> 'a t - - (** [first t p] returns the first subterm of type [t] of a given - parent [p] *) - val first : ('a,'b) cls -> 'a t -> 'b t option - - (** [last t p] returns a last subterm of type [t] of a given - parent [p] *) - val last : ('a,'b) cls -> 'a t -> 'b t option - - (** [next t p id] returns a term that is after a term with a given - [id], if such exists. *) - val next : ('a,'b) cls -> 'a t -> tid -> 'b t option - - (** [prev t p id] returns a term that precedes a term with a given - [id], if such exists. *) - val prev : ('a,'b) cls -> 'a t -> tid -> 'b t option - - (** [after t ?rev p tid] returns all subterms in term [p] that - occur after a term with a given [tid]. if [rev] is [true] or - omitted then terms are returned in the evaluation - order. Otherwise they're reversed. If there is no term with - a given [tid], then an empty sequence is returned. *) - val after : ('a,'b) cls -> ?rev:bool -> 'a t -> tid -> 'b t seq - - (** [before t ?rev p tid] returns all term that occurs before - definition with a given [tid] in blk. If there is no such - definition, then the sequence will be empty. @param rev has - the same meaning as in {!after}. *) - val before : ('a,'b) cls -> ?rev:bool -> 'a t -> tid -> 'b t seq - - (** [append t ~after:this p c] returns the [p] term with [c] - appended after [this] term. If [after] is not specified, then - append [def] to the end of the parent term (if it makes sense, - otherwise it is just added). If [this] doesn't occur in the - [p] term then do nothing. The term tid is preserved. *) - val append : ('a,'b) cls -> ?after:tid -> 'a t -> 'b t -> 'a t - - (** [prepend t ~before:this p c] returns the [p] with [c] inserted - before [this] term. If [before] is left unspecified, then - insert the [c] at the beginning of the [p] if it is a - sequence, otherwise just insert. If [this] is specified but - doesn't occur in the [p] then [p] is returned as is. In all - cases, the returned term has the same [tid] as [p]. *) - val prepend : ('a,'b) cls -> ?before:tid -> 'a t -> 'b t -> 'a t - - (** [nth t p n] returns [n]'th [t]-term of parent [p]. *) - val nth : ('a,'b) cls -> 'a t -> int -> 'b t option - - (** [nth_exn t p n] same as [nth], but raises exception if [n] is - not a valid position number. *) - val nth_exn : ('a,'b) cls -> 'a t -> int -> 'b t - (** @since 2.6.0 *) - module KB : sig - (** [map t p ~f] returns term [p] with all subterms of type [t] - mapped with function [f]. + val first : ('a, 'b) cls -> 'a t -> 'b t option + (** [first t p] returns the first subterm of type [t] of a given parent [p] + *) - @since 2.6.0 - *) - val map : ('a,'b) cls -> 'a t -> f:('b t -> 'b t knowledge) -> 'a t knowledge + val last : ('a, 'b) cls -> 'a t -> 'b t option + (** [last t p] returns a last subterm of type [t] of a given parent [p] *) - (** [filter_map t p ~f] returns term [p] with all subterms of type - [t] filter_mapped with function [f], i.e., all terms for which - function [f] returned [Some thing] are substituted by the - [thing], otherwise they're removed from the parent term. + val next : ('a, 'b) cls -> 'a t -> tid -> 'b t option + (** [next t p id] returns a term that is after a term with a given [id], if + such exists. *) - @since 2.6.0 - *) - val filter_map : ('a,'b) cls -> 'a t -> f:('b t -> 'b t option knowledge) -> 'a t knowledge + val prev : ('a, 'b) cls -> 'a t -> tid -> 'b t option + (** [prev t p id] returns a term that precedes a term with a given [id], if + such exists. *) + + val after : ('a, 'b) cls -> ?rev:bool -> 'a t -> tid -> 'b t seq + (** [after t ?rev p tid] returns all subterms in term [p] that occur after a + term with a given [tid]. if [rev] is [true] or omitted then terms are + returned in the evaluation order. Otherwise they're reversed. If there + is no term with a given [tid], then an empty sequence is returned. *) + + val before : ('a, 'b) cls -> ?rev:bool -> 'a t -> tid -> 'b t seq + (** [before t ?rev p tid] returns all term that occurs before definition + with a given [tid] in blk. If there is no such definition, then the + sequence will be empty. + @param rev has the same meaning as in {!after}. *) + + val append : ('a, 'b) cls -> ?after:tid -> 'a t -> 'b t -> 'a t + (** [append t ~after:this p c] returns the [p] term with [c] appended after + [this] term. If [after] is not specified, then append [def] to the end + of the parent term (if it makes sense, otherwise it is just added). If + [this] doesn't occur in the [p] term then do nothing. The term tid is + preserved. *) + val prepend : ('a, 'b) cls -> ?before:tid -> 'a t -> 'b t -> 'a t + (** [prepend t ~before:this p c] returns the [p] with [c] inserted before + [this] term. If [before] is left unspecified, then insert the [c] at the + beginning of the [p] if it is a sequence, otherwise just insert. If + [this] is specified but doesn't occur in the [p] then [p] is returned as + is. In all cases, the returned term has the same [tid] as [p]. *) + + val nth : ('a, 'b) cls -> 'a t -> int -> 'b t option + (** [nth t p n] returns [n]'th [t]-term of parent [p]. *) + + val nth_exn : ('a, 'b) cls -> 'a t -> int -> 'b t + (** [nth_exn t p n] same as [nth], but raises exception if [n] is not a + valid position number. *) + + (** @since 2.6.0 *) + module KB : sig + val map : + ('a, 'b) cls -> 'a t -> f:('b t -> 'b t knowledge) -> 'a t knowledge + (** [map t p ~f] returns term [p] with all subterms of type [t] mapped + with function [f]. + + @since 2.6.0 *) + + val filter_map : + ('a, 'b) cls -> + 'a t -> + f:('b t -> 'b t option knowledge) -> + 'a t knowledge + (** [filter_map t p ~f] returns term [p] with all subterms of type [t] + filter_mapped with function [f], i.e., all terms for which function + [f] returned [Some thing] are substituted by the [thing], otherwise + they're removed from the parent term. + + @since 2.6.0 *) + + val filter : + ('a, 'b) cls -> 'a t -> f:('b t -> bool knowledge) -> 'a t knowledge (** [filter t p ~f] returns a term [p] with subterms [c] for which [f c = false] removed. - @since 2.6.0 - *) - val filter : ('a,'b) cls -> 'a t -> f:('b t -> bool knowledge) -> 'a t knowledge + @since 2.6.0 *) end (** {2 Attributes} - Terms attribute set can be extended, using {{!Value}universal - values}. A value of type ['a tag] is used to denote an - attribute of type ['a] with the name [Value.Tag.name tag]. + Terms attribute set can be extended, using {{!Value}universal values}. A + value of type ['a tag] is used to denote an attribute of type ['a] with + the name [Value.Tag.name tag]. - With the provided interface Term can be treated as an - extensible record. + With the provided interface Term can be treated as an extensible record. *) - (** [set_attr term attr value] attaches an [value] to attribute - [attr] in [term] *) val set_attr : 'a t -> 'b tag -> 'b -> 'a t + (** [set_attr term attr value] attaches an [value] to attribute [attr] in + [term] *) - (** [attrs term attrs] returns the set of [attributes] associated - with a [term]*) val attrs : 'a t -> Dict.t + (** [attrs term attrs] returns the set of [attributes] associated with a + [term]*) - (** [with_attrs term attributes] returns a term with a new set of [attributes] *) val with_attrs : 'a t -> Dict.t -> 'a t + (** [with_attrs term attributes] returns a term with a new set of + [attributes] *) - (** [get_attr term attr] returns a value of the a given [attr] in - [term] *) val get_attr : 'a t -> 'b tag -> 'b option + (** [get_attr term attr] returns a value of the a given [attr] in [term] *) - (** [has_attr term attr] is [true] if [term] has attribute [attr] *) val has_attr : 'a t -> 'b tag -> bool + (** [has_attr term attr] is [true] if [term] has attribute [attr] *) - (** [del_attr term attr] deletes attribute [attr] from [term] *) val del_attr : 'a t -> 'b tag -> 'a t + (** [del_attr term attr] deletes attribute [attr] from [term] *) + (** Predefined attributes *) - (** {Predefined attributes} *) - - (** a term was artificially produced from a term with a given tid. *) val origin : tid tag + (** a term was artificially produced from a term with a given tid. *) - (** a term was introduced artificially by an analysis. *) val synthetic : unit tag + (** a term was introduced artificially by an analysis. *) - (** a term is identified as always non dead *) val live : unit tag + (** a term is identified as always non dead *) - (** a term is identified as dead *) val dead : unit tag + (** a term is identified as dead *) - (** to mark a term as visited by some algorithm *) val visited : unit tag + (** to mark a term as visited by some algorithm *) - (** precondition must on the entrance to the subroutine *) val precondition : exp tag + (** precondition must on the entrance to the subroutine *) - (** invariant must be always true while the term is evaluated *) val invariant : exp tag + (** invariant must be always true while the term is evaluated *) - (** must hold just after the term is left *) val postcondition : exp tag + (** must hold just after the term is left *) - (** {2 Higher order mapping} *) + (** {2 Higher order mapping} *) - (** Mapper performs deep identity term mapping. If you override any - method make sure that you didn't forget to invoke parent's - method, as OCaml will not call it for you. *) + (** Mapper performs deep identity term mapping. If you override any method + make sure that you didn't forget to invoke parent's method, as OCaml + will not call it for you. *) class mapper : object inherit Exp.mapper + method map_term : 't 'p. ('p, 't) cls -> 't term -> 't term (** [map_term cls t] dispatches [t] to corresponding method *) - method map_term : 't 'p. ('p,'t) cls -> 't term -> 't term - (** [run p] maps each sub in program [p] *) method run : program term -> program term + (** [run p] maps each sub in program [p] *) - (** [map_sub sub] maps each arg and blk in [sub] *) method map_sub : sub term -> sub term + (** [map_sub sub] maps each arg and blk in [sub] *) - (** [map_arg arg] is [arg] *) method map_arg : arg term -> arg term + (** [map_arg arg] is [arg] *) - (** [map_blk blk] is [blk] *) method map_blk : blk term -> blk term + (** [map_blk blk] is [blk] *) - (** [map_phi phi] is [phi] *) method map_phi : phi term -> phi term + (** [map_phi phi] is [phi] *) - (** [map_def def] is [def] *) method map_def : def term -> def term + (** [map_def def] is [def] *) - (** [map_jmp jmp] is [jmp] *) method map_jmp : jmp term -> jmp term + (** [map_jmp jmp] is [jmp] *) end - (** Visitor performs deep visiting. As always, don't forget to - overrid parent methods. The visitor comes with useful [enter_T] - [leave_T] that are no-ops in this visitor, so if you inherit - directly from it, then you may not call to the parent method. *) + (** Visitor performs deep visiting. As always, don't forget to overrid + parent methods. The visitor comes with useful [enter_T] [leave_T] that + are no-ops in this visitor, so if you inherit directly from it, then you + may not call to the parent method. *) class ['a] visitor : object inherit ['a] Exp.visitor + method enter_term : 't 'p. ('p, 't) cls -> 't term -> 'a -> 'a - method enter_term : 't 'p . ('p,'t) cls -> 't term -> 'a -> 'a - (** [visit_term cls t] dispatch term [t] to corresponding method *) - method visit_term : 't 'p . ('p,'t) cls -> 't term -> 'a -> 'a - method leave_term : 't 'p . ('p,'t) cls -> 't term -> 'a -> 'a + method visit_term : 't 'p. ('p, 't) cls -> 't term -> 'a -> 'a + (** [visit_term cls t] dispatch term [t] to corresponding method *) + method leave_term : 't 'p. ('p, 't) cls -> 't term -> 'a -> 'a method enter_program : program term -> 'a -> 'a - method run : program term -> 'a -> 'a + method run : program term -> 'a -> 'a method leave_program : program term -> 'a -> 'a - method enter_sub : sub term -> 'a -> 'a method visit_sub : sub term -> 'a -> 'a method leave_sub : sub term -> 'a -> 'a - method enter_blk : blk term -> 'a -> 'a method visit_blk : blk term -> 'a -> 'a method leave_blk : blk term -> 'a -> 'a - method enter_arg : arg term -> 'a -> 'a method visit_arg : arg term -> 'a -> 'a method leave_arg : arg term -> 'a -> 'a - method enter_phi : phi term -> 'a -> 'a method visit_phi : phi term -> 'a -> 'a method leave_phi : phi term -> 'a -> 'a - method enter_def : def term -> 'a -> 'a method visit_def : def term -> 'a -> 'a method leave_def : def term -> 'a -> 'a - method enter_jmp : jmp term -> 'a -> 'a method visit_jmp : jmp term -> 'a -> 'a method leave_jmp : jmp term -> 'a -> 'a end - (** [switch cls t ~program ~sub .. ~jmp] performs a pattern - matching over a term [t] based on its type class [cls]. - It is guaranteed that only one function will be called for a - term.*) - val switch : ('p,'t) cls -> + val switch : + ('p, 't) cls -> program:(program term -> 'a) -> sub:(sub term -> 'a) -> arg:(arg term -> 'a) -> blk:(blk term -> 'a) -> phi:(phi term -> 'a) -> def:(def term -> 'a) -> - jmp:(jmp term -> 'a) -> 't term -> 'a - - (** [proj cls t ?case] a special case of pattern matching, - where all cases by default returns [None] *) - val proj : ('p,'t) cls -> + jmp:(jmp term -> 'a) -> + 't term -> + 'a + (** [switch cls t ~program ~sub .. ~jmp] performs a pattern matching over a + term [t] based on its type class [cls]. It is guaranteed that only one + function will be called for a term.*) + + val proj : + ('p, 't) cls -> ?program:(program term -> 'a option) -> ?sub:(sub term -> 'a option) -> ?arg:(arg term -> 'a option) -> @@ -8633,12 +8181,14 @@ module Std : sig ?phi:(phi term -> 'a option) -> ?def:(def term -> 'a option) -> ?jmp:(jmp term -> 'a option) -> - 't term -> 'a option - - (** [cata cls ~init t ?case] performs a pattern matching. All - methods by default returns [init]. - Note: [cata] stands for [catamorphism] *) - val cata : ('p,'t) cls -> init:'a -> + 't term -> + 'a option + (** [proj cls t ?case] a special case of pattern matching, where all cases + by default returns [None] *) + + val cata : + ('p, 't) cls -> + init:'a -> ?program:(program term -> 'a) -> ?sub:(sub term -> 'a) -> ?arg:(arg term -> 'a) -> @@ -8646,605 +8196,579 @@ module Std : sig ?phi:(phi term -> 'a) -> ?def:(def term -> 'a) -> ?jmp:(jmp term -> 'a) -> - 't term -> 'a + 't term -> + 'a + (** [cata cls ~init t ?case] performs a pattern matching. All methods by + default returns [init]. Note: [cata] stands for [catamorphism] *) - (** the graphical representation of the program *) val slot : (Theory.Program.Semantics.cls, blk term list) Knowledge.slot + (** the graphical representation of the program *) end - (** Program in Intermediate representation. *) + (** Program in Intermediate representation. *) module Program : sig (** Program is a collection of function terms. *) type t = program term + val create : ?subs:sub term list -> ?tid:tid -> unit -> t (** [create ?subs ?tid ()] creates a new program. - Creates a program from the given subs. If [tid] is not - specified then a fresh tid is generated. + Creates a program from the given subs. If [tid] is not specified then a + fresh tid is generated. - @since 2.3.0 has the optional [subs] parameter. - *) - val create : ?subs:sub term list -> ?tid:tid -> unit -> t + @since 2.3.0 has the optional [subs] parameter. *) - (** [lift symbols] takes a table of functions and return a whole - program lifted into IR *) val lift : symtab -> program term + (** [lift symbols] takes a table of functions and return a whole program + lifted into IR *) - (** [to_graph program] creates a callgraph of a [program] *) val to_graph : t -> Graphs.Callgraph.t + (** [to_graph program] creates a callgraph of a [program] *) - (** [lookup t program id] is like {{!find}find} but performs deep - lookup in the whole [program] for a term with a given [id]. - This function is memoized, so it has amortized O(1) - complexity, with a wostcase complexity of $O(N)$, where $N$ is - the total amount of terms in program. *) - val lookup : (_,'b) cls -> t -> tid -> 'b term option + val lookup : (_, 'b) cls -> t -> tid -> 'b term option + (** [lookup t program id] is like {{!find}find} but performs deep lookup in + the whole [program] for a term with a given [id]. This function is + memoized, so it has amortized O(1) complexity, with a wostcase + complexity of $O(N)$, where $N$ is the total amount of terms in program. + *) - (** [parent t program id] is [Some p] iff [find t p id <> None] *) - val parent : ('a,'b) cls -> t -> tid -> 'a term option + val parent : ('a, 'b) cls -> t -> tid -> 'a term option + (** [parent t program id] is [Some p] iff [find t p id <> None] *) - (** Program builder. *) + (** Program builder. *) module Builder : sig type t - (** Initializes an empty builder. *) - val create : ?tid:tid -> ?subs:int -> unit -> t - (** [add_sub builder sub] appends a subroutine term to the - program. *) + val create : ?tid:tid -> ?subs:int -> unit -> t + (** Initializes an empty builder. *) + val add_sub : t -> sub term -> unit + (** [add_sub builder sub] appends a subroutine term to the program. *) - (** fixes the result *) val result : t -> program term + (** fixes the result *) end (** @since 2.6.0 *) module KB : sig - (** [lift symbols] takes a table of functions and return a whole - program lifted into IR. - - @since 2.6.0 - *) val lift : symtab -> program term knowledge + (** [lift symbols] takes a table of functions and return a whole program + lifted into IR. + + @since 2.6.0 *) end - (** [pp_slots names] prints slots that are in [names]. *) val pp_slots : string list -> Format.formatter -> t -> unit + (** [pp_slots names] prints slots that are in [names]. *) + include Regular.S with type t := t end - (** Subroutine. *) + (** Subroutine. *) module Sub : sig - (** Subroutine is a set of blocks. The first block of a function is + type t = sub term + (** Subroutine is a set of blocks. The first block of a function is considered an entry block. @since 2.6.0 subroutines with duplicate names in a program are no - longer mangled every time the program is updated. It is done only - once when the program is lifted (see [Program.lift]). - *) - type t = sub term - - (** [create ?name ()] creates a new subroutine. - - Creates a subroutine that includes given arguments and - blocks. The order of the terms is preserved with the first - block being the entry block. No references between blocks are - added, so the blocks shall be correctly linked and be - reachable from the entry block. - If [tid] is not specied then a fresh one is generated. - if [name] is not specified then a fresh name is derived from - the [tid]. + longer mangled every time the program is updated. It is done only once + when the program is lifted (see [Program.lift]). *) - @since 2.3.0 has the [args] optional parameter - @since 2.3.0 has the [blks] optional parameter - *) val create : ?args:arg term list -> ?blks:blk term list -> ?tid:tid -> - ?name:string -> unit -> t + ?name:string -> + unit -> + t + (** [create ?name ()] creates a new subroutine. + + Creates a subroutine that includes given arguments and blocks. The order + of the terms is preserved with the first block being the entry block. No + references between blocks are added, so the blocks shall be correctly + linked and be reachable from the entry block. + + If [tid] is not specied then a fresh one is generated. if [name] is not + specified then a fresh name is derived from the [tid]. + + @since 2.3.0 has the [args] optional parameter + @since 2.3.0 has the [blks] optional parameter *) - (** [lift entry] takes an basic block of assembler instructions, - as an entry and lifts it to the subroutine term. *) val lift : block -> cfg -> sub term + (** [lift entry] takes an basic block of assembler instructions, as an entry + and lifts it to the subroutine term. *) - (** [name sub] returns a subroutine name *) val name : t -> string + (** [name sub] returns a subroutine name *) - (** updates subroutine name *) val with_name : t -> string -> t + (** updates subroutine name *) - (** [ssa sub] returns [sub] in SSA form. If program is already in - SSA, then do nothing (see also {!is_ssa}). The underlying - algorithm produces a semi-pruned SSA form. To represent - different versions of the same variable we use {{!Var}variable - versions}. Any definition of a variable increases its version - number. So, the zero version is reserved for variables that - weren't defined before the first use. *) val ssa : t -> t + (** [ssa sub] returns [sub] in SSA form. If program is already in SSA, then + do nothing (see also {!is_ssa}). The underlying algorithm produces a + semi-pruned SSA form. To represent different versions of the same + variable we use {{!Var}variable versions}. Any definition of a variable + increases its version number. So, the zero version is reserved for + variables that weren't defined before the first use. *) - (** [is_ssa sub] is [true] if [sub] was transformed into an SSA - form. This is O(1) predicate that doesn't really check, that - a subroutine is in an SSA form, so it is a responsibility of - a user to preserve the SSA form on any transformation. *) val is_ssa : t -> bool + (** [is_ssa sub] is [true] if [sub] was transformed into an SSA form. This + is O(1) predicate that doesn't really check, that a subroutine is in an + SSA form, so it is a responsibility of a user to preserve the SSA form + on any transformation. *) - (** [free_vars sub] computes a set of variables that are free in - a given subroutine [sub]. The variable is considered free if it - is used before defined or is not locally bound. If [sub] is in - an SSA form, then the set is computed trivially, thanks to a - naming scheme. If program is not in an SSA form, then a BFS on a - dominators tree is used. *) val free_vars : t -> Var.Set.t + (** [free_vars sub] computes a set of variables that are free in a given + subroutine [sub]. The variable is considered free if it is used before + defined or is not locally bound. If [sub] is in an SSA form, then the + set is computed trivially, thanks to a naming scheme. If program is not + in an SSA form, then a BFS on a dominators tree is used. *) + val to_graph : t -> Graphs.Tid.t (** [to_graph sub] builds a graph of subroutine [sub]. - Graph nodes are block term identifiers and edges are labeled - with term identifiers of the jmp terms that correspond to - the given edge. + Graph nodes are block term identifiers and edges are labeled with term + identifiers of the jmp terms that correspond to the given edge. @since 2.1 the returned graph contains two pseudo-nodes - [Graphs.Tid.start] and [Graphs.Tid.exit] so that all nodes - that has in-degree [0] or that start a strongly connected - component are connected to the [start] node (the same for - [exit] but on the reversed graph. + + [Graphs.Tid.start] and [Graphs.Tid.exit] so that all nodes that has + in-degree [0] or that start a strongly connected component are connected + to the [start] node (the same for [exit] but on the reversed graph. Edges from [start] to other nodes are labeled with the [Graphs.Tid.start] tid. Edges from nodes to the [exit] node are labeled with the - [Graphs.Tid.exit] tid. - *) - val to_graph : t -> Graphs.Tid.t + [Graphs.Tid.exit] tid. *) - (** [to_cfg sub] builds a graph representation of a subroutine - [sub]. All graph operations are mapped to corresponding - [Term] operations. See {!Graphlib.Ir} for more information.*) val to_cfg : t -> Graphs.Ir.t + (** [to_cfg sub] builds a graph representation of a subroutine [sub]. All + graph operations are mapped to corresponding [Term] operations. See + {!Graphlib.Ir} for more information.*) - (** [of_cfg cfg] extracts a [sub term] from a given graph [cfg]. - Since {!Graphlib.Ir} module builds term incrementally this - operation is just a projection, i.e., it has O(0) complexity. *) val of_cfg : Graphs.Ir.t -> t + (** [of_cfg cfg] extracts a [sub term] from a given graph [cfg]. Since + {!Graphlib.Ir} module builds term incrementally this operation is just a + projection, i.e., it has O(0) complexity. *) - + val compute_liveness : t -> (tid, Var.Set.t) Solution.t + [@@deprecated "[since 2022-03] use Live.compute"] (** [compute_liveness sub] computes a set of live variables for each block. For a block [b] and solution [s = compute_liveness sub], - [Solution.get s (Term.tid b)] is a set of variables that are - live at the _exit_ from this block. + [Solution.get s (Term.tid b)] is a set of variables that are live at the + _exit_ from this block. - A set of variables that are live (free) in the - whole subroutine is the set of variables that are live at the - [Graphs.Tid.start] node. + A set of variables that are live (free) in the whole subroutine is the + set of variables that are live at the [Graphs.Tid.start] node. - When the subroutine is in the SSA form then the phi-nodes have - the following semantics. + When the subroutine is in the SSA form then the phi-nodes have the + following semantics. - Informally, a phi-node defines the values on the corresponding - edges of the predecessors. + Informally, a phi-node defines the values on the corresponding edges of + the predecessors. @since 2.1 @since 2.5.0 supports SSA @before 2.5.0 the subroutine must not be in the SSA form - *) - val compute_liveness : t -> (tid, Var.Set.t) Solution.t - [@@deprecated "[since 2022-03] use Live.compute"] + @deprecated since 2022-03: use {!Live.compute} *) - (** [flatten sub] returns [sub] in flattened form in which all - operands are trivial. - @see Blk.flatten for more information about flattening. + val flatten : t -> t + (** [flatten sub] returns [sub] in flattened form in which all operands are + trivial. see {!Blk.flatten} for more information about flattening. @since 2.5.0 *) - val flatten : t -> t - (** other names for the given subroutine.*) val aliases : string list tag + (** other names for the given subroutine.*) - (** A subroutine doesn't examine any values except its arguments, - and have no effects except the return value. Basically this is - just slightly more strict class than the pure attribute below, - since function is not allowed to read global memory. Note that a - function that has pointer arguments and examines the data - pointed to is not const. Likewise, a function that - calls a non-const function usually is not be const. It does not - make sense for a const function to return void *) val const : unit tag + (** A subroutine doesn't examine any values except its arguments, and have + no effects except the return value. Basically this is just slightly more + strict class than the pure attribute below, since function is not + allowed to read global memory. Note that a function that has pointer + arguments and examines the data pointed to is not const. Likewise, a + function that calls a non-const function usually is not be const. It + does not make sense for a const function to return void *) - (** A subroutine have no effects except the return value and their - return value depends only on the parameters and/or global - variables. *) val pure : unit tag + (** A subroutine have no effects except the return value and their return + value depends only on the parameters and/or global variables. *) - (** A subroutine is a stub *) val stub : unit tag + (** A subroutine is a stub *) - (** A subroutine is visible outside of the compilation unit *) val extern : unit tag + (** A subroutine is visible outside of the compilation unit *) - (** a subroutine doesn't contain any calls in any disguise, i.e., - no longjmps, indirect calls, exceptions, etc. *) val leaf : unit tag + (** a subroutine doesn't contain any calls in any disguise, i.e., no + longjmps, indirect calls, exceptions, etc. *) - (** A subroutine is malloc-like, i.e., the pointer P returned - by the subroutine cannot alias any other pointer valid when the - function returns, and moreover no pointers to valid objects occur - in any storage addressed by P. *) val malloc : unit tag + (** A subroutine is malloc-like, i.e., the pointer P returned by the + subroutine cannot alias any other pointer valid when the function + returns, and moreover no pointers to valid objects occur in any storage + addressed by P. *) - (** A subroutine will not return (either loop infinitely or abort - a program) *) val noreturn : unit tag + (** A subroutine will not return (either loop infinitely or abort a program) + *) - (** A subroutine may return more than one time. Examples of such - functions are setjmp and vfork *) val returns_twice : unit tag + (** A subroutine may return more than one time. Examples of such functions + are setjmp and vfork *) - (** A subroutine doesn't throw exceptions *) val nothrow : unit tag + (** A subroutine doesn't throw exceptions *) - (** a subroutine is the binary entry point *) val entry_point : unit tag + (** a subroutine is the binary entry point *) - (** a subroutine is an intrinisic or special instruction - not a real subroutine. - - @since 2.5.0 *) val intrinsic : unit tag + (** a subroutine is an intrinisic or special instruction not a real + subroutine. + + @since 2.5.0 *) (** Subroutine builder *) module Builder : sig type t - (** initializes empty subroutine builder. *) - val create : ?tid:tid -> ?args:int -> ?blks:int -> ?name:string -> - unit -> t + val create : + ?tid:tid -> ?args:int -> ?blks:int -> ?name:string -> unit -> t + (** initializes empty subroutine builder. *) - (** appends a block to a subroutine *) val add_blk : t -> blk term -> unit + (** appends a block to a subroutine *) - (** appends an argument *) val add_arg : t -> arg term -> unit + (** appends an argument *) - (** returns current result *) val result : t -> sub term + (** returns current result *) end (** @since 2.6.0 *) module KB : sig - (** [lift entry] takes an basic block of assembler instructions, - as an entry and lifts it to the subroutine term. - - @since 2.6.0 - *) val lift : block -> cfg -> sub term knowledge + (** [lift entry] takes an basic block of assembler instructions, as an + entry and lifts it to the subroutine term. - (** [ssa sub] returns [sub] in SSA form. If program is already in - SSA, then do nothing (see also {!is_ssa}). The underlying - algorithm produces a semi-pruned SSA form. To represent - different versions of the same variable we use {{!Var}variable - versions}. Any definition of a variable increases its version - number. So, the zero version is reserved for variables that - weren't defined before the first use. + @since 2.6.0 *) - @since 2.6.0 - *) val ssa : t -> t knowledge + (** [ssa sub] returns [sub] in SSA form. If program is already in SSA, + then do nothing (see also {!is_ssa}). The underlying algorithm + produces a semi-pruned SSA form. To represent different versions of + the same variable we use {{!Var}variable versions}. Any definition of + a variable increases its version number. So, the zero version is + reserved for variables that weren't defined before the first use. - (** [flatten sub] returns [sub] in flattened form in which all - operands are trivial. - @see Blk.KB.flatten for more information about flattening. + @since 2.6.0 *) - @since 2.6.0 - *) val flatten : t -> t knowledge + (** [flatten sub] returns [sub] in flattened form in which all operands + are trivial. see {!Blk.KB.flatten} for more information about + flattening. + + @since 2.6.0 *) end - (** [pp_slots names] prints slots that are in [names]. *) val pp_slots : string list -> Format.formatter -> t -> unit + (** [pp_slots names] prints slots that are in [names]. *) include Regular.S with type t := t end - (** Definition. *) + (** Definition. *) module Def : sig - (** The definition is an assignment. The left hand side of an - assignment is a variable, and the right side is an expression. + (** The definition is an assignment. The left hand side of an assignment is + a variable, and the right side is an expression. - The definition is the only way for a block to perform some - side effects. + The definition is the only way for a block to perform some side effects. *) type t = def term - (** [reify v x] reifies Core Theory terms into the IR term. *) val reify : ?tid:tid -> 'a Theory.var -> 'a Theory.value -> t + (** [reify v x] reifies Core Theory terms into the IR term. *) - (** [var def] is the left-hand-side as a Core Theory variable. *) val var : t -> unit Theory.var + (** [var def] is the left-hand-side as a Core Theory variable. *) - (** [value def] is the right-hand-side as a Core Theory value. *) val value : t -> unit Theory.value + (** [value def] is the right-hand-side as a Core Theory value. *) - (** [create ?tid x exp] creates definition [x := exp] *) val create : ?tid:tid -> var -> exp -> t + (** [create ?tid x exp] creates definition [x := exp] *) - (** returns the left hand side of a definition *) val lhs : t -> var + (** returns the left hand side of a definition *) - (** returns the right hand side of a definition *) val rhs : t -> exp + (** returns the right hand side of a definition *) - (** updates the lhs of definition *) val with_lhs : t -> var -> t + (** updates the lhs of definition *) - (** updates the right hand side of a definition *) val with_rhs : t -> exp -> t + (** updates the right hand side of a definition *) - (** [map_exp def f] applies [f] to a [rhs] of [def] and returns - an updated definition. *) val map_exp : t -> f:(exp -> exp) -> t + (** [map_exp def f] applies [f] to a [rhs] of [def] and returns an updated + definition. *) - (** [substitute def x y] substitutes [x] by [y] in the right hand - side of a definition [def] *) val substitute : t -> exp -> exp -> t + (** [substitute def x y] substitutes [x] by [y] in the right hand side of a + definition [def] *) - (** [free_vars def] returns a set of free variables, that occurs - on the right hand side of definition [def]. See {!Exp.free_vars} - for more information. *) val free_vars : t -> Var.Set.t + (** [free_vars def] returns a set of free variables, that occurs on the + right hand side of definition [def]. See {!Exp.free_vars} for more + information. *) - (** [pp_slots names] prints slots that are in [names]. *) val pp_slots : string list -> Format.formatter -> t -> unit + (** [pp_slots names] prints slots that are in [names]. *) include Regular.S with type t := t end - (** A control transfer operation. *) + (** A control transfer operation. *) module Jmp : sig - (** Jmp is the only way to transfer control from block to block. - Jumps are guarded with conditions. The jump should be taken - only if its condition is evaluated to true. - When control flow reaches the end of block it should take the - first jump with true condition. If there is no such jump, then - program stops. + (** Jmp is the only way to transfer control from block to block. Jumps are + guarded with conditions. The jump should be taken only if its condition + is evaluated to true. When control flow reaches the end of block it + should take the first jump with true condition. If there is no such + jump, then program stops. Jumps are further subdivided into categories: - - goto - is a local control transfer instruction. The label - can be only local to subroutine; - - call - transfer a control to another subroutine. A call - contains a continuation, i.e., a label to which we're hoping - to return after subroutine returns the control to us. Of - course, called subroutine can in general return to another - position, or not to return at all. + - goto - is a local control transfer instruction. The label can be only + local to subroutine; + - call - transfer a control to another subroutine. A call contains a + continuation, i.e., a label to which we're hoping to return after + subroutine returns the control to us. Of course, called subroutine can + in general return to another position, or not to return at all. - ret - performs a return from subroutine - - int - calls to interrupt subroutine. If interrupt returns, - then continue with the provided label. - *) + - int - calls to interrupt subroutine. If interrupt returns, then + continue with the provided label. *) type t = jmp term - type dst - - + val reify : + ?tid:tid -> + ?cnd:Theory.Bool.t Theory.value -> + ?alt:dst -> + ?dst:dst -> + unit -> + t (** [reify ()] reifies inputs into a jump term. - Calls and interrupt subroutines invocations are represented - with two edges: the normal edge (denoted [dst]) is the - intra-procedural edge which connects the callsite with the - fall-through destination (if such exists) and an alternative - destination (denoted with [alt]) which represents an - inter-procedural destination between the callsite and the + Calls and interrupt subroutines invocations are represented with two + edges: the normal edge (denoted [dst]) is the intra-procedural edge + which connects the callsite with the fall-through destination (if such + exists) and an alternative destination (denoted with [alt]) which + represents an inter-procedural destination between the callsite and the call destination. - @param cnd is a core theory term that denotes the - guard condition of a conditional jump. + @param cnd + is a core theory term that denotes the guard condition of a + conditional jump. @param alt is the alternative control flow destination. @param dst is the direct control flow destination - @tid is the jump identifier, if not specified a fresh - new identifier is created. + @param tid + is the jump identifier, if not specified a fresh new identifier is + created. *) - *) - val reify : ?tid:tid -> - ?cnd:Theory.Bool.t Theory.value -> - ?alt:dst -> ?dst:dst -> unit -> t - - (** [guard jmp] if [jmp] is conditional, returns its condition. *) val guard : t -> Theory.Bool.t Theory.value option + (** [guard jmp] if [jmp] is conditional, returns its condition. *) - (** [with_guard jmp cnd] updates the jump condition of [jmp]. *) val with_guard : t -> Theory.Bool.t Theory.value option -> t + (** [with_guard jmp cnd] updates the jump condition of [jmp]. *) - (** [dst jmp] returns the intra-procedural destination of [jmp]. *) val dst : t -> dst option + (** [dst jmp] returns the intra-procedural destination of [jmp]. *) - (** [alt jmp] returns the inter-procedural destination of [jmp]. *) val alt : t -> dst option + (** [alt jmp] returns the inter-procedural destination of [jmp]. *) - (** [resolved dst] creates a resolved destination.*) val resolved : tid -> dst + (** [resolved dst] creates a resolved destination.*) - + val indirect : 'a Theory.Bitv.t Theory.value -> dst (** [indirect v] creates an indirect jump destination. - The destination (or a set of destinations) is encoded with - the Core Theory term [v]. *) - val indirect : 'a Theory.Bitv.t Theory.value -> dst + The destination (or a set of destinations) is encoded with the Core + Theory term [v]. *) + val resolve : dst -> (tid, 'a Theory.Bitv.t Theory.value) Either.t (** [resolve dst] resolves destination. *) - val resolve : dst -> (tid,'a Theory.Bitv.t Theory.value) Either.t - (** [create ?cond kind] creates a jump of a given kind *) val create : ?tid:tid -> ?cond:exp -> jmp_kind -> t + (** [create ?cond kind] creates a jump of a given kind *) - (** [create_call ?cond target] transfer control to subroutine - [target] *) - val create_call : ?tid:tid -> ?cond:exp -> call -> t + val create_call : ?tid:tid -> ?cond:exp -> call -> t + (** [create_call ?cond target] transfer control to subroutine [target] *) - (** [create_goto ?cond label] local jump *) val create_goto : ?tid:tid -> ?cond:exp -> label -> t + (** [create_goto ?cond label] local jump *) - (** [create_ret ?cond label] return from a procedure *) - val create_ret : ?tid:tid -> ?cond:exp -> label -> t + val create_ret : ?tid:tid -> ?cond:exp -> label -> t + (** [create_ret ?cond label] return from a procedure *) - (** [create_int ?cond int_number return] call interrupt subroutine *) - val create_int : ?tid:tid -> ?cond:exp -> int -> tid -> t + val create_int : ?tid:tid -> ?cond:exp -> int -> tid -> t + (** [create_int ?cond int_number return] call interrupt subroutine *) - (** [kind jmp] evaluates to a kind of jump *) val kind : t -> jmp_kind + (** [kind jmp] evaluates to a kind of jump *) - (** [cond jmp] returns the jump guard condition *) val cond : t -> exp + (** [cond jmp] returns the jump guard condition *) - (** [exps jmp] returns a sequence of expressions occurring in - different positions of a jump [jmp], e.g., in [cond], - [target], etc. *) val exps : t -> exp seq + (** [exps jmp] returns a sequence of expressions occurring in different + positions of a jump [jmp], e.g., in [cond], [target], etc. *) - (** [free_vars jmp] returns a set of all variables that are free - in some expression in the jump [jmp]. *) val free_vars : t -> Var.Set.t + (** [free_vars jmp] returns a set of all variables that are free in some + expression in the jump [jmp]. *) - (** [map_exp jmp ~f] applies [f] to each expression in a [jmp], - e.g., conditions and indirect labels. *) val map_exp : t -> f:(exp -> exp) -> t + (** [map_exp jmp ~f] applies [f] to each expression in a [jmp], e.g., + conditions and indirect labels. *) - (** [substitute jmp x y] substitutes [x] by [y] in all expressions - that occur in jump [jmp] expressions.*) val substitute : t -> exp -> exp -> t + (** [substitute jmp x y] substitutes [x] by [y] in all expressions that + occur in jump [jmp] expressions.*) + val with_cond : t -> exp -> t (** [with_cond jmp c] updates jump's guard condition @since 2.0.0 *) - val with_cond : t -> exp -> t + val with_kind : t -> jmp_kind -> t (** [with_kind jmp k] updates jump's kind @since 2.0.0 *) - val with_kind : t -> jmp_kind -> t + val with_alt : t -> dst option -> t (** [with_alt jmp d] updates jump's inter-procedural destination @since 2.1.0 *) - val with_alt : t -> dst option -> t + val with_dst : t -> dst option -> t (** [with_dst jmp d] updates jump's intra-procedural destination @since 2.1.0 *) - val with_dst : t -> dst option -> t - (** [pp_slots names] prints slots that are in [names]. *) val pp_slots : string list -> Format.formatter -> t -> unit + (** [pp_slots names] prints slots that are in [names]. *) + include Regular.S with type t := t end - (** PHI-node *) + (** PHI-node *) module Phi : sig - (** Phi nodes are used to represent a set of values that can be - assigned to a given variable depending on a control flow path - taken. Phi nodes should occur only in blocks that has more - than one incoming edge, i.e., in blocks to which there is a - transfer of control flow from more than one block. - - Each element of a phi-node corresponds to a particular - incoming edge. *) type t = phi term + (** Phi nodes are used to represent a set of values that can be assigned to + a given variable depending on a control flow path taken. Phi nodes + should occur only in blocks that has more than one incoming edge, i.e., + in blocks to which there is a transfer of control flow from more than + one block. + Each element of a phi-node corresponds to a particular incoming edge. *) - (** [reify v xs] reifies Core Theory terms into the phi term. *) - val reify : ?tid:tid -> - 'a Theory.var -> - (tid * 'a Theory.value) list -> - t + val reify : ?tid:tid -> 'a Theory.var -> (tid * 'a Theory.value) list -> t + (** [reify v xs] reifies Core Theory terms into the phi term. *) - (** [var phi] is the left-hand-side of the [phi] term. *) val var : t -> unit Theory.var + (** [var phi] is the left-hand-side of the [phi] term. *) - (** [options def] returns a list of possible values the term can take. - - Values are predicated with the term identifiers of the paths (denoted - by the tid of the predecessor) - *) val options : t -> (tid * unit Theory.value) seq + (** [options def] returns a list of possible values the term can take. + Values are predicated with the term identifiers of the paths (denoted by + the tid of the predecessor) *) - (** [create var label exp] creates a phi-node that associates a - variable [var] with an expression [exp]. This expression - should be selected if a control flow enters a block, that owns - this phi-node from a block labeled with [label]. Example, - [create x loop_header y].*) val create : ?tid:tid -> var -> tid -> exp -> t + (** [create var label exp] creates a phi-node that associates a variable + [var] with an expression [exp]. This expression should be selected if a + control flow enters a block, that owns this phi-node from a block + labeled with [label]. Example, [create x loop_header y].*) - (** [of_list var bindings] creates a phi-node, that for each pair - of [label,exp] in the [bindings] list associates variable [var] - with expression [exp] if control flow reaches this point via block - labeled with [label]. *) val of_list : ?tid:tid -> var -> (tid * exp) list -> t + (** [of_list var bindings] creates a phi-node, that for each pair of + [label,exp] in the [bindings] list associates variable [var] with + expression [exp] if control flow reaches this point via block labeled + with [label]. *) - (** [values phi] enumerate all possible values. *) val values : t -> (tid * exp) seq + (** [values phi] enumerate all possible values. *) - (** [free_vars t] returns a set of variables that occur free on - the right hand side of the phi-node. See {Exp.free_vars} for - clarification on what variables are considered free. *) val free_vars : t -> Var.Set.t + (** [free_vars t] returns a set of variables that occur free on the right + hand side of the phi-node. See + {[ + Exp.free_vars + ]} + for clarification on what variables are considered free. *) - (** [lhs phi] returns a variable associated with a phi node *) val lhs : t -> var + (** [lhs phi] returns a variable associated with a phi node *) - (** [with_lhs phi var] updates a left hand side of [phi] with - [var] *) val with_lhs : t -> var -> t + (** [with_lhs phi var] updates a left hand side of [phi] with [var] *) - (** [map_exp t ~f] applies [f] to all expressions on the right - hand side of a phi-node [t] *) val map_exp : t -> f:(exp -> exp) -> t + (** [map_exp t ~f] applies [f] to all expressions on the right hand side of + a phi-node [t] *) - (** [substitute phi x y] substitutes [x] by [y] in all right - hand-side expressions of the [phi] node. *) val substitute : t -> exp -> exp -> t + (** [substitute phi x y] substitutes [x] by [y] in all right hand-side + expressions of the [phi] node. *) - (** [update phi label exp] associates expression [exp] with a - control flow path labeled with [label]. *) val update : t -> tid -> exp -> t + (** [update phi label exp] associates expression [exp] with a control flow + path labeled with [label]. *) - (** [select phi label] takes the value corresponding to a control - flow path marked with [label]. *) val select : t -> tid -> exp option + (** [select phi label] takes the value corresponding to a control flow path + marked with [label]. *) - (** [select_or_unknown phi label] is [exp] if - [select phi label = Some exp], otherwise returns a - [Bil.unknown] expression. *) val select_or_unknown : t -> tid -> exp + (** [select_or_unknown phi label] is [exp] if [select phi label = Some exp], + otherwise returns a [Bil.unknown] expression. *) - (** [remove def id] removes definition with a given [id] *) val remove : t -> tid -> t + (** [remove def id] removes definition with a given [id] *) - (** [pp_slots names] prints slots that are in [names]. *) val pp_slots : string list -> Format.formatter -> t -> unit + (** [pp_slots names] prints slots that are in [names]. *) + include Regular.S with type t := t end (** Basic block. - Logically block consists of a set of {{!Phi}phi nodes}, a - sequence of {{!Def}definitions} and a sequence of out-coming - edges, aka {{!Jmp}jumps}. A colloquial term for this three - entities is a {e block element}. + Logically block consists of a set of {{!Phi}phi nodes}, a sequence of + {{!Def}definitions} and a sequence of out-coming edges, aka {{!Jmp}jumps}. + A colloquial term for this three entities is a {e block element}. - The order of Phi-nodes can be specified in any order, as - they execute simultaneously . Definitions are stored in the - order of execution. Jumps are specified in the order in which - they should be taken, i.e., jmp_n is taken only after - jmp_n-1 and if and only if the latter was not taken. For - example, if block ends with N jumps, where each n-th jump - have destination named t_n and condition c_n then it - would have the semantics as per the following OCaml program: + The order of Phi-nodes can be specified in any order, as they execute + simultaneously . Definitions are stored in the order of execution. Jumps + are specified in the order in which they should be taken, i.e., jmp_n is + taken only after jmp_n-1 and if and only if the latter was not taken. For + example, if block ends with N jumps, where each n-th jump have destination + named t_n and condition c_n then it would have the semantics as per the + following OCaml program: {v if c_1 then jump t_1 else @@ -9253,196 +8777,185 @@ module Std : sig stop v} *) module Blk : sig - type t = blk term - (** Union type for all element types *) - type elt = [ - | `Def of def term - | `Phi of phi term - | `Jmp of jmp term - ] + type elt = [ `Def of def term | `Phi of phi term | `Jmp of jmp term ] + (** Union type for all element types *) + val create : + ?phis:phi term list -> + ?defs:def term list -> + ?jmps:jmp term list -> + ?tid:tid -> + unit -> + t (** [create ?phis ?defs ?jmps ?tid ()] creates a new block. - Creates a new block that contains the passed [phis], [defs], - and [jmps]. If [tid] is not specified then a fresh one is - generated. + Creates a new block that contains the passed [phis], [defs], and [jmps]. + If [tid] is not specified then a fresh one is generated. @since 2.3.0 has the optional [phis] parameter. @since 2.3.0 has the optional [defs] parameter. - @since 2.3.0 has the optional [jmps] parameter. - *) - val create : - ?phis:phi term list -> - ?defs:def term list -> - ?jmps:jmp term list -> - ?tid:tid -> unit -> t + @since 2.3.0 has the optional [jmps] parameter. *) - (** [lift block] takes a basic block of assembly instructions and - lifts it to a list of blk terms. The first term in the list - is the entry. *) val lift : cfg -> block -> blk term list + (** [lift block] takes a basic block of assembly instructions and lifts it + to a list of blk terms. The first term in the list is the entry. *) - (** [from_insn ?addr insn] creates an IR representation of a single - machine instruction [insn]. - - Uses the [Term.slot] to get the IR representation of an - instruction, trying to keep the number of basic blocks minimal - (by coalescing adjacent data operations). + val from_insn : ?addr:addr -> insn -> blk term list + (** [from_insn ?addr insn] creates an IR representation of a single machine + instruction [insn]. - If [addr] is specified then the term identifier of the first - block will be specific to that address and the [address] - attribute will be set to the passed value. + Uses the [Term.slot] to get the IR representation of an instruction, + trying to keep the number of basic blocks minimal (by coalescing + adjacent data operations). - @since 2.3.0 has [addr] parameter. - *) - val from_insn : ?addr:addr -> insn -> blk term list + If [addr] is specified then the term identifier of the first block will + be specific to that address and the [address] attribute will be set to + the passed value. - (** [from_insns block] translates a basic block of instructions - into IR. - - Takes a list of instructions in the execution order and - translates them into a list of IR blks that are properly - connected. The instructions shall belong to a single basic - block. - - The first element of the result is the entry block. If [addr] - is set then it will have the term identifier equal to - [Term.for_addr addr] and the [address] attribute will be set to - [addr]. - - The [fall] parameter designates the fallthrough destination of - the basic block. The destination could be either - interprocedural ([`Inter]) or intraprocedural ([`Intra]). In - the latter case it will be reified into a jump of the call - kind. If the last instruction (the basic block terminator) is a - barrier [Insn.(is barrier) is [true]], then the [fall] - destination is ignored, even if set. + @since 2.3.0 has [addr] parameter. *) - @since 2.3.0 *) val from_insns : - ?fall:[`Inter of Jmp.dst | `Intra of Jmp.dst ] -> + ?fall:[ `Inter of Jmp.dst | `Intra of Jmp.dst ] -> ?addr:addr -> insn list -> blk term list + (** [from_insns block] translates a basic block of instructions into IR. + + Takes a list of instructions in the execution order and translates them + into a list of IR blks that are properly connected. The instructions + shall belong to a single basic block. + The first element of the result is the entry block. If [addr] is set + then it will have the term identifier equal to [Term.for_addr addr] and + the [address] attribute will be set to [addr]. - (** [split_while blk ~f] splits [blk] into two block: the first - block holds all definitions for which [f p] is true and has - the same tid as [blk]. The second block is freshly created and - holds the rest definitions (if any). All successors of the - [blk] become successors of the second block, which becomes the - successor of the first block. + The [fall] parameter designates the fallthrough destination of the basic + block. The destination could be either interprocedural ([`Inter]) or + intraprocedural ([`Intra]). In the latter case it will be reified into a + jump of the call kind. If the last instruction (the basic block + terminator) is a barrier [Insn.(is barrier) is [true]], then the [fall] + destination is ignored, even if set. + + @since 2.3.0 *) - Note: if [f def] is [true] for all blocks, then the second - block will not contain any definitions, i.e., the result would - be the same as of {{!split_bot}split_bot} function. *) val split_while : t -> f:(def term -> bool) -> t * t + (** [split_while blk ~f] splits [blk] into two block: the first block holds + all definitions for which [f p] is true and has the same tid as [blk]. + The second block is freshly created and holds the rest definitions (if + any). All successors of the [blk] become successors of the second block, + which becomes the successor of the first block. - (** [split_after blk def] creates two new blocks, where the first - block contains all defintions up to [def] inclusive, the - second contains the rest. + Note: if [f def] is [true] for all blocks, then the second block will + not contain any definitions, i.e., the result would be the same as of + {{!split_bot}split_bot} function. *) - Note: if def is not in a [blk] then the first block will contain - all the defintions, and the second block will be empty. *) val split_after : t -> def term -> t * t + (** [split_after blk def] creates two new blocks, where the first block + contains all defintions up to [def] inclusive, the second contains the + rest. + + Note: if def is not in a [blk] then the first block will contain all the + defintions, and the second block will be empty. *) - (** [split_before blk def] is like {{!split_after}split_after} but - [def] will fall into the second [blk] *) val split_before : t -> def term -> t * t + (** [split_before blk def] is like {{!split_after}split_after} but [def] + will fall into the second [blk] *) - (** [split_top blk] returns two blocks, where first block shares - the same tid as [blk] and has all $\Phi$-nodes of [blk], but - has only one destination, namely the second block. Second - block has new tidentity, but inherits all definitions and - jumps from the [blk]. *) val split_top : t -> t * t + (** [split_top blk] returns two blocks, where first block shares the same + tid as [blk] and has all $\Phi$-nodes of [blk], but has only one + destination, namely the second block. Second block has new tidentity, + but inherits all definitions and jumps from the [blk]. *) - (** [split_top blk] returns two blocks, where first block shares - the same tid as [blk], has all $\Phi$-nodes and definitions - from [blk], but has only one destination, namely the second - block. Second block has new tidentity, all jumps from the - [blk]. *) val split_bot : t -> t * t + (** [split_top blk] returns two blocks, where first block shares the same + tid as [blk], has all $\Phi$-nodes and definitions from [blk], but has + only one destination, namely the second block. Second block has new + tidentity, all jumps from the [blk]. *) - (** [elts ~rev blk] return all elements of the [blk]. if [rev] is - false or left unspecified, then elements are returned in the - following order: $\Phi$-nodes, defs (in normal order), jmps in - the order in which they will be taken. If [rev] is true, the - order will be the following: all jumps in the opposite order, - then definitions in the opposite order, and finally - $\Phi$-nodes. *) val elts : ?rev:bool -> t -> elt seq + (** [elts ~rev blk] return all elements of the [blk]. if [rev] is false or + left unspecified, then elements are returned in the following order: + $\Phi$-nodes, defs (in normal order), jmps in the order in which they + will be taken. If [rev] is true, the order will be the following: all + jumps in the opposite order, then definitions in the opposite order, and + finally $\Phi$-nodes. *) - (** [map_exp b ~f] applies function [f] for each expression in - block [b]. By default function [f] will be applied to all - values of type [exp], including right hand sides of phi-nodes, - definitions, jump conditions and targets. If [skip] parameter - is specified, then terms of corresponding kind will be - skipped, i.e., function [f] will not be applied to them. *) val map_exp : - ?skip:[`phi | `def | `jmp] list -> (** defaults to [[]] *) - t -> f:(exp -> exp) -> t + ?skip:[ `phi | `def | `jmp ] list -> + (* defaults to [[]] *) + t -> + f:(exp -> exp) -> + t + (** [map_exp b ~f] applies function [f] for each expression in block [b]. By + default function [f] will be applied to all values of type [exp], + including right hand sides of phi-nodes, definitions, jump conditions + and targets. If [skip] parameter is specified, then terms of + corresponding kind will be skipped, i.e., function [f] will not be + applied to them. *) - (** [map_elt ?phi ?def ?jmp blk] applies provided functions to the - terms of corresponding classes. All functions default to the - identity function. *) val map_elts : ?phi:(phi term -> phi term) -> ?def:(def term -> def term) -> - ?jmp:(jmp term -> jmp term) -> blk term -> blk term + ?jmp:(jmp term -> jmp term) -> + blk term -> + blk term + (** [map_elt ?phi ?def ?jmp blk] applies provided functions to the terms of + corresponding classes. All functions default to the identity function. + *) - (** [substitute ?skip blk x y] substitutes each occurrence of - expression [x] with expression [y] in block [blk]. The - substitution is performed deeply. If [skip] parameter is - specified, then terms of corresponding kind will be left - untouched. *) val substitute : - ?skip:[`phi | `def | `jmp] list -> (** defaults to [[]] *) - t -> exp -> exp -> t - - (** [map_lhs blk ~f] applies [f] to every left hand side variable - in def and phi subterms of [blk]. If [skip] parameter is - specified, then terms of corresponding kind will be left - untouched. E.g., [map_lhs ~skip:[`phi] ~f:(substitute vars)] - will perform a substitution only on definitions (and will - ignore phi-nodes) *) + ?skip:[ `phi | `def | `jmp ] list -> + (* defaults to [[]] *) + t -> + exp -> + exp -> + t + (** [substitute ?skip blk x y] substitutes each occurrence of expression [x] + with expression [y] in block [blk]. The substitution is performed + deeply. If [skip] parameter is specified, then terms of corresponding + kind will be left untouched. *) + val map_lhs : - ?skip:[`phi | `def ] list -> (** defaults to [[]] *) - t -> f:(var -> var) -> t - - (** [find_var blk var] finds a last definition of a variable [var] - in a block [blk]. *) - val find_var : t -> var -> [ - | `Phi of phi term - | `Def of def term - ] option - - (** [defines_var blk x] true if there exists such phi term or def - term with left hand side equal to [x] *) + ?skip:[ `phi | `def ] list -> + (* defaults to [[]] *) + t -> + f:(var -> var) -> + t + (** [map_lhs blk ~f] applies [f] to every left hand side variable in def and + phi subterms of [blk]. If [skip] parameter is specified, then terms of + corresponding kind will be left untouched. E.g., + [map_lhs ~skip:[`phi] ~f:(substitute vars)] will perform a substitution + only on definitions (and will ignore phi-nodes) *) + + val find_var : t -> var -> [ `Phi of phi term | `Def of def term ] option + (** [find_var blk var] finds a last definition of a variable [var] in a + block [blk]. *) + val defines_var : t -> var -> bool + (** [defines_var blk x] true if there exists such phi term or def term with + left hand side equal to [x] *) - (** [free_vars blk] returns a set of variables that occurs free - in block [blk]. A variable is free, if it occurs unbound in the - expression and there is no preceding definition of this variable - in a block [blk]. *) val free_vars : t -> Var.Set.t + (** [free_vars blk] returns a set of variables that occurs free in block + [blk]. A variable is free, if it occurs unbound in the expression and + there is no preceding definition of this variable in a block [blk]. *) - (** [uses_var blk x] true if variable [x] is in [free_vars blk]. - If you need to call this function on several variables it is - better to compute [free_vars] explicitly and use [Set.mem] - function. *) val uses_var : t -> var -> bool + (** [uses_var blk x] true if variable [x] is in [free_vars blk]. If you need + to call this function on several variables it is better to compute + [free_vars] explicitly and use [Set.mem] function. *) - (** [occurs blk after:x def] if [def] is occurs after definition - [def] in [blk]. *) val occurs : t -> after:tid -> tid -> bool + (** [occurs blk after:x def] if [def] is occurs after definition [def] in + [blk]. *) - (** [flatten blk] translates [blk] into the flattened form. - In the flattened form, all operations are applied to variables, - constants, or unknowns, i.e., the operands could not be compound - expressions. E.g., + val flatten : t -> t + (** [flatten blk] translates [blk] into the flattened form. In the flattened + form, all operations are applied to variables, constants, or unknowns, + i.e., the operands could not be compound expressions. E.g., {v #10 := 11 * (#9 + 13) - 17 v} @@ -9453,111 +8966,106 @@ module Std : sig #10 := #12 - 17 v} @since 2.5.0 *) - val flatten : t -> t - (** Builder interface. *) + (** Builder interface. *) module Builder : sig - (** This interface provides an efficient way to build new - blocks. It is also useful, when rebuilding existing block. - It is the user responsibility to preserve the uniqueness of - identifiers throughout the program instance. *) type t + (** This interface provides an efficient way to build new blocks. It is + also useful, when rebuilding existing block. It is the user + responsibility to preserve the uniqueness of identifiers throughout + the program instance. *) - (** [create ~tid ~phis ~defs ~jmp ()] creates a block builder. - If [tid] parameter is specified, then the new block will - have this tid. If any of [phis], [defs] or [jmps] parameters - are specified, the provtided number would be used as a hint - of the expected amount of the corresponding entries. Since - it is the hint, it can mismatch with the actual size. The - hint must be a positive number. *) val create : ?tid:tid -> ?phis:int -> ?defs:int -> ?jmps:int -> unit -> t + (** [create ~tid ~phis ~defs ~jmp ()] creates a block builder. If [tid] + parameter is specified, then the new block will have this tid. If any + of [phis], [defs] or [jmps] parameters are specified, the provtided + number would be used as a hint of the expected amount of the + corresponding entries. Since it is the hint, it can mismatch with the + actual size. The hint must be a positive number. *) - (** [init blk] creates a builder based on an existing - block. If [copy_phis], [copy_defs] or [copy_jmps] is [true] - (defaults to [false]), then prepopulate builder with - corresponding terms from block [blk]. If [same_tid] is true - (default), then a resulting block will have the same [tid] - as block [blk]. Otherwise, a fresh new [tid] will be created. *) val init : - ?same_tid :bool -> (** defaults to [true] *) - ?copy_phis:bool -> (** defaults to [false] *) - ?copy_defs:bool -> (** defaults to [false] *) - ?copy_jmps:bool -> (** defaults to [false] *) - blk term -> t + ?same_tid:bool -> + (* defaults to [true] *) + ?copy_phis:bool -> + (* defaults to [false] *) + ?copy_defs:bool -> + (* defaults to [false] *) + ?copy_jmps:bool -> + (* defaults to [false] *) + blk term -> + t + (** [init blk] creates a builder based on an existing block. If + [copy_phis], [copy_defs] or [copy_jmps] is [true] (defaults to + [false]), then prepopulate builder with corresponding terms from block + [blk]. If [same_tid] is true (default), then a resulting block will + have the same [tid] as block [blk]. Otherwise, a fresh new [tid] will + be created. *) - (** appends a definition *) val add_def : t -> def term -> unit + (** appends a definition *) - (** appends a jump *) val add_jmp : t -> jmp term -> unit + (** appends a jump *) - (** appends a phi node *) val add_phi : t -> phi term -> unit + (** appends a phi node *) - (** appends generic element *) val add_elt : t -> elt -> unit + (** appends generic element *) - (** returns current result *) - val result : t -> blk term + val result : t -> blk term + (** returns current result *) end module KB : sig - (** [lift block] takes a basic block of assembly instructions and - lifts it to a list of blk terms. The first term in the list - is the entry. - - @since 2.6.0 - *) val lift : cfg -> block -> blk term list knowledge + (** [lift block] takes a basic block of assembly instructions and lifts it + to a list of blk terms. The first term in the list is the entry. - (** [from_insn ?addr insn] creates an IR representation of a single - machine instruction [insn]. - - Uses the [Term.slot] to get the IR representation of an - instruction, trying to keep the number of basic blocks minimal - (by coalescing adjacent data operations). - - If [addr] is specified then the term identifier of the first - block will be specific to that address and the [address] - attribute will be set to the passed value. + @since 2.6.0 *) - @since 2.6.0 - *) val from_insn : ?addr:addr -> insn -> blk term list knowledge + (** [from_insn ?addr insn] creates an IR representation of a single + machine instruction [insn]. - (** [from_insns block] translates a basic block of instructions - into IR. - - Takes a list of instructions in the execution order and - translates them into a list of IR blks that are properly - connected. The instructions shall belong to a single basic - block. + Uses the [Term.slot] to get the IR representation of an instruction, + trying to keep the number of basic blocks minimal (by coalescing + adjacent data operations). - The first element of the result is the entry block. If [addr] - is set then it will have the term identifier equal to - [Term.for_addr addr] and the [address] attribute will be set to - [addr]. + If [addr] is specified then the term identifier of the first block + will be specific to that address and the [address] attribute will be + set to the passed value. - The [fall] parameter designates the fallthrough destination of - the basic block. The destination could be either - interprocedural ([`Inter]) or intraprocedural ([`Intra]). In - the latter case it will be reified into a jump of the call - kind. If the last instruction (the basic block terminator) is a - barrier [Insn.(is barrier) is [true]], then the [fall] - destination is ignored, even if set. + @since 2.6.0 *) - @since 2.6.0 - *) val from_insns : - ?fall:[`Inter of Jmp.dst | `Intra of Jmp.dst ] -> + ?fall:[ `Inter of Jmp.dst | `Intra of Jmp.dst ] -> ?addr:addr -> insn list -> blk term list knowledge + (** [from_insns block] translates a basic block of instructions into IR. + + Takes a list of instructions in the execution order and translates + them into a list of IR blks that are properly connected. The + instructions shall belong to a single basic block. - (** [flatten blk] translates [blk] into the flattened form. - In the flattened form, all operations are applied to variables, - constants, or unknowns, i.e., the operands could not be compound - expressions. E.g., + The first element of the result is the entry block. If [addr] is set + then it will have the term identifier equal to [Term.for_addr addr] + and the [address] attribute will be set to [addr]. + + The [fall] parameter designates the fallthrough destination of the + basic block. The destination could be either interprocedural + ([`Inter]) or intraprocedural ([`Intra]). In the latter case it will + be reified into a jump of the call kind. If the last instruction (the + basic block terminator) is a barrier [Insn.(is barrier) is [true]], + then the [fall] destination is ignored, even if set. + + @since 2.6.0 *) + + val flatten : t -> t knowledge + (** [flatten blk] translates [blk] into the flattened form. In the + flattened form, all operations are applied to variables, constants, or + unknowns, i.e., the operands could not be compound expressions. E.g., {v #10 := 11 * (#9 + 13) - 17 v} @@ -9568,226 +9076,212 @@ module Std : sig #10 := #12 - 17 v} - @since 2.6.0 - *) - val flatten : t -> t knowledge + @since 2.6.0 *) end - (** [pp_slots names] prints slots that are in [names]. *) val pp_slots : string list -> Format.formatter -> t -> unit + (** [pp_slots names] prints slots that are in [names]. *) + include Regular.S with type t := t end - (** Subroutine argument. *) + (** Subroutine argument. *) module Arg : sig - (** In the IR model subroutines are not functions, that has a return - value, but a more general subroutine that has a set of - arguments, that can be used for input, output or both - purposes. *) + (** In the IR model subroutines are not functions, that has a return value, + but a more general subroutine that has a set of arguments, that can be + used for input, output or both purposes. *) type t = arg term + val reify : + ?tid:tid -> ?intent:intent -> 'a Theory.var -> 'a Theory.value -> t + (** [reify v x] reifies Core Theory terms into an [arg] term. *) - (** [reify v x] reifies Core Theory terms into an [arg] term. *) - val reify : ?tid:tid -> ?intent:intent -> - 'a Theory.var -> - 'a Theory.value -> t - - - (** [var arg] is the left-hand-side of the [arg] term. *) val var : t -> unit Theory.var + (** [var arg] is the left-hand-side of the [arg] term. *) - (** [value arg] is the right-hand-side of the [arg] term. *) val value : t -> unit Theory.value + (** [value arg] is the right-hand-side of the [arg] term. *) - (** [create ?intent var exp] creates an argument. If intent is - not specified it is left unknown. *) val create : ?tid:tid -> ?intent:intent -> var -> exp -> t + (** [create ?intent var exp] creates an argument. If intent is not specified + it is left unknown. *) - (** [lhs arg] returns a variable associated with the argument. *) val lhs : t -> var + (** [lhs arg] returns a variable associated with the argument. *) - (** [rhs arg] returns an expression to which argument is - bound. *) val rhs : t -> exp + (** [rhs arg] returns an expression to which argument is bound. *) - (** [intent arg] returns the argument intent. The [None] value - denontes unknown intent. *) val intent : t -> intent option + (** [intent arg] returns the argument intent. The [None] value denontes + unknown intent. *) - (** [with_intent intent arg] updates argument intent *) val with_intent : t -> intent -> t + (** [with_intent intent arg] updates argument intent *) - (** removes the intent from an argument *) val with_unknown_intent : t -> t + (** removes the intent from an argument *) - (** {2 Attributes} *) + (** {2 Attributes} *) - (** a caller of the subroutine must use an argument tagged with - this attribute. This is useful for subroutines where not - checking the result is either a security problem or always a - bug, such as [realloc] *) val warn_unused : unit tag + (** a caller of the subroutine must use an argument tagged with this + attribute. This is useful for subroutines where not checking the result + is either a security problem or always a bug, such as [realloc] *) - (** the size of allocated memory is the product of arguments - marked with [alloc_size] attribute *) val alloc_size : unit tag + (** the size of allocated memory is the product of arguments marked with + [alloc_size] attribute *) - (** format(DSL) the specified argument of a subroutine is - actually a format string written in a corresponding DSL. *) val format : string tag + (** format(DSL) the specified argument of a subroutine is actually a format + string written in a corresponding DSL. *) - (** a contract requirement that this argument is not NULL. *) val nonnull : unit tag + (** a contract requirement that this argument is not NULL. *) include Regular.S with type t := t end - (** A control transfer to another subroutine. *) + (** A control transfer to another subroutine. *) module Call : sig - (** calls have two-fold representation. From the intra-procedural - point of view call is a transfer of control to the next - address with a side effect of calling to other - subroutine. From the iter-procedural point of view, call is - transfer of control from caller to the callee, that may or may - not result in a return to the caller side. Thus each call is - represented by two labels. The [target] label points to the - procedure that is called, the [return] label denotes a block - to which the control flow should (but may not) continue when - called subroutine returns. *) + (** calls have two-fold representation. From the intra-procedural point of + view call is a transfer of control to the next address with a side + effect of calling to other subroutine. From the iter-procedural point of + view, call is transfer of control from caller to the callee, that may or + may not result in a return to the caller side. Thus each call is + represented by two labels. The [target] label points to the procedure + that is called, the [return] label denotes a block to which the control + flow should (but may not) continue when called subroutine returns. *) type t = call - (** [create ?return ~target ()] creates a call to the [target] - subroutine. If [return] is not provided, that it is assumed that - subroutine doesn't return. *) val create : ?return:label -> target:label -> unit -> t + (** [create ?return ~target ()] creates a call to the [target] subroutine. + If [return] is not provided, that it is assumed that subroutine doesn't + return. *) - (** returns the target of the call *) val target : t -> label + (** returns the target of the call *) - (** returns call continuation *) val return : t -> label option + (** returns call continuation *) - (** updates target *) val with_target : t -> label -> t + (** updates target *) - (** updates return continuation *) val with_return : t -> label -> t + (** updates return continuation *) - (** marks call as a "noreturn" *) val with_noreturn : t -> t + (** marks call as a "noreturn" *) include Regular.S with type t := t end - (** Target of a control flow transfer. *) + (** Target of a control flow transfer. *) module Label : sig - (** Labels can be direct, that are known to us. Or indirect, that - are arbitrary expressions. *) + (** Labels can be direct, that are known to us. Or indirect, that are + arbitrary expressions. *) type t = label - (** [create ()] creates a new label with a freshly generated - identifier. *) val create : unit -> t + (** [create ()] creates a new label with a freshly generated identifier. *) - (** [direct label] creates a direct label with a given identifier. *) val direct : tid -> t + (** [direct label] creates a direct label with a given identifier. *) - (** [indirect exp] creates a label that is resolved to an - expression [exp] *) val indirect : exp -> t + (** [indirect exp] creates a label that is resolved to an expression [exp] + *) - (** updates the label *) val change : ?direct:(tid -> tid) -> ?indirect:(exp -> exp) -> t -> t + (** updates the label *) include Regular.S with type t := t end (** Source of information.*) module Source : sig - type 'a t = 'a Or_error.t stream type 'a source = 'a t - (** Factory of data processors. - Registry of sources of information. *) + (** Factory of data processors. Registry of sources of information. *) module Factory : sig - (** Factory interface *) + (** Factory interface *) module type S = sig type t - (** [list source] is a list of names of source providers *) val list : unit -> string list + (** [list source] is a list of names of source providers *) - (** [create name args] finds a source provider with the - given name and creates it *) val find : string -> t source option + (** [create name args] finds a source provider with the given name and + creates it *) - (** [register name cons] registers a method that creates a given - source of information. If a method with the given name already - exists, then it will be superseded by a new one. *) val register : string -> t source -> unit + (** [register name cons] registers a method that creates a given source + of information. If a method with the given name already exists, then + it will be superseded by a new one. *) end - module Make(T : T) : S with type t = T.t + module Make (T : T) : S with type t = T.t end end (** Abstract taint. - We represent a taint with a term identifier, to designate that a - taint was produced by a term with the given id. A taint set is - usually associated with each variable of a given term. This set - defines a set of taints with which a variable is tainted. + We represent a taint with a term identifier, to designate that a taint was + produced by a term with the given id. A taint set is usually associated + with each variable of a given term. This set defines a set of taints with + which a variable is tainted. - @deprecated use the Bap Taint Framework - *) + @deprecated use the Bap Taint Framework *) module Taint : sig - type t = tid - type set = Tid.Set.t [@@deriving bin_io, compare, sexp] type map = set Var.Map.t [@@deriving bin_io, compare, sexp] - (** value stored in register is source of taint *) val reg : t tag + (** value stored in register is source of taint *) - (** value stored at memory location, that is stored - in the register is tainted.*) val ptr : t tag + (** value stored at memory location, that is stored in the register is + tainted.*) - (** maps each variable that is used in a term to a set of register taints *) val regs : map tag + (** maps each variable that is used in a term to a set of register taints *) - (** maps each variable that is used in a term to a set of pointer taints *) val ptrs : map tag + (** maps each variable that is used in a term to a set of pointer taints *) - (** [merge t1 t2] merge taint maps *) val merge : map -> map -> map + (** [merge t1 t2] merge taint maps *) - class context : object('s) - - (** taint result with the given set of taints *) + class context : object ('s) method taint_reg : Bil.result -> set -> 's + (** taint result with the given set of taints *) - (** taint memory region [addr, addr+size] with the given set of taints *) method taint_ptr : addr -> size -> set -> 's + (** taint memory region [addr, addr+size] with the given set of taints *) - (** returns a set of taints associated with a given result of computation *) method reg_taints : Bil.result -> set + (** returns a set of taints associated with a given result of computation + *) - (** returns a set of taints associated with a given address *) method ptr_taints : addr -> set + (** returns a set of taints associated with a given address *) - (** returns all known taints. *) method all_taints : set + (** returns all known taints. *) end module type S = sig - type ('a,'e) state - module Expi : Expi.S with type ('a,'e) state = ('a,'e) state + type ('a, 'e) state + + module Expi : Expi.S with type ('a, 'e) state = ('a, 'e) state (** Propagate taint through expressions. @@ -9797,12 +9291,11 @@ module Std : sig The following syntactic forms are used in propagation rules: - [*a] - load from address [a], where [a] is immediate value; - [*a <- v] - store value [v] at address [a]; - [exp ~> v] - expression reduces to value [v]; - [v -> t] - value [v] is tainted by a taint [t]; - [] - BIL binary operation or BIL concat expression; - [] - BIL unary, extract or cast expression. + [*a] - load from address [a], where [a] is immediate value; [*a <- v] + \- store value [v] at address [a]; [exp ~> v] - expression reduces to + value [v]; [v -> t] - value [v] is tainted by a taint [t]; [] - + BIL binary operation or BIL concat expression; [] - BIL unary, + extract or cast expression. {3 Rules} @@ -9810,342 +9303,295 @@ module Std : sig exists a deriviation of the following rules, proving this fact. {v - - *a ~> v - a -> t - ---------------- :: p_load - v -> t - - *a <- v - v -> t - ---------------- :: p_store - a -> t - - v1 v2 ~> v3 - v1 -> t - ----------------- :: p_bop_lhs - v3 -> t - - v1 v2 ~> v3 - v2 -> t - ----------------- :: p_bop_rhs - v3 -> t - - v1 ~> v2 - v1 -> t - ----------------- :: p_uop - v2 -> t - - v} + *a ~> v + a -> t + ---------------- :: p_load + v -> t + + *a <- v + v -> t + ---------------- :: p_store + a -> t + + v1 v2 ~> v3 + v1 -> t + ----------------- :: p_bop_lhs + v3 -> t + + v1 v2 ~> v3 + v2 -> t + ----------------- :: p_bop_rhs + v3 -> t + + v1 ~> v2 + v1 -> t + ----------------- :: p_uop + v2 -> t + v} Note 1: this class overrides only methods, that computes non-leaf expressions, leaving a space for extension for derived classes. Note 2: we do not propagate taint from condition to branches in the - if/then/else expression, since we're propagating only data - dependency, not control dependency. - - Although, one can argue, that in expression [if c then x else y] - the result depends on [c], since if we change [c] we will get - different results, there is a good reason for not propagating this - dependency - the consistency with BIR and BIL. Consider, BIL's - [if] statement or BIR's conditional jump. If we will start to - propagate taint from condition in [ite] expression, then we should - also propagate it in BIL's and BIR's conditionals. Unfortunately - the latter is not possible. - - *) - class ['a] propagator : object('s) + if/then/else expression, since we're propagating only data dependency, + not control dependency. + + Although, one can argue, that in expression [if c then x else y] the + result depends on [c], since if we change [c] we will get different + results, there is a good reason for not propagating this dependency - + the consistency with BIR and BIL. Consider, BIL's [if] statement or + BIR's conditional jump. If we will start to propagate taint from + condition in [ite] expression, then we should also propagate it in + BIL's and BIR's conditionals. Unfortunately the latter is not + possible. *) + class ['a] propagator : object ('s) constraint 'a = #context inherit ['a] Expi.t end end - module Make(M : Monad.State.S2) : S with type ('a,'e) state = ('a,'e) M.t - - include S with type ('a,'e) state = ('a,'e) Monad.State.t + module Make (M : Monad.State.S2) : S with type ('a, 'e) state = ('a, 'e) M.t + include S with type ('a, 'e) state = ('a, 'e) Monad.State.t - (** print a set of taints *) val pp_set : Format.formatter -> set -> unit + (** print a set of taints *) - (** print a taint map *) val pp_map : Format.formatter -> map -> unit + (** print a taint map *) module Map : Regular.S with type t = map - end [@@deprecated "[since 2018-03] use the Bap Taint Framework instead"] + end + [@@deprecated "[since 2018-03] use the Bap Taint Framework instead"] type 'a source = 'a Source.t - (** Symbolizer maps addresses to function names *) + (** Symbolizer maps addresses to function names *) module Symbolizer : sig - - (** symbolizer data type *) type t = symbolizer + (** symbolizer data type *) - - (** [provide agent symbolizer] registers [symbolizer] in the - knowledge base. + val provide : Knowledge.agent -> t -> unit + (** [provide agent symbolizer] registers [symbolizer] in the knowledge base. This function enables an easy integration of the old - symbolizers/information sources infrastructure into the - knowledge base representation introduced with BAP 2.0. + symbolizers/information sources infrastructure into the knowledge base + representation introduced with BAP 2.0. - A symbolizer is regiestered in the knowledge base through an - agent which denotes the level of trustwothiness of the - symbolizer. - *) - val provide : Knowledge.agent -> t -> unit + A symbolizer is regiestered in the knowledge base through an agent which + denotes the level of trustwothiness of the symbolizer. *) + val providing : + Knowledge.agent -> t -> (unit -> 'a knowledge) -> 'a knowledge (** [providing t scope] provides the information in the specified [scope], - After the [scope] function is evaluated the information source - is retracted from the knowledge base. - - See {!Bap_knowledge.Knowledge.proposing{proposing}}. + After the [scope] function is evaluated the information source is + retracted from the knowledge base. - @since 2.2.0 - *) - val providing : Knowledge.agent -> t -> (unit -> 'a knowledge) -> 'a knowledge + See {!Bap_knowledge.Knowledge.proposing} . + @since 2.2.0 *) - (** [create fn] creates a symbolizer for a given function *) val create : (addr -> string option) -> t + (** [create fn] creates a symbolizer for a given function *) - - (** [set_path s] limits the symbolizer applicability only to - addresses that belong to a file/compilation unit with the - specified path. - - @since 2.2.0 - *) val set_path : t -> string -> t + (** [set_path s] limits the symbolizer applicability only to addresses that + belong to a file/compilation unit with the specified path. + @since 2.2.0 *) - (** [path s] is the path to the file that this symbolizer serves. - @since 2.2.0 - *) val path : t -> string option + (** [path s] is the path to the file that this symbolizer serves. + @since 2.2.0 *) - (** [of_blocks] produces a symbolizer from a serialized - sequence of blocks. Each element of the sequence is deconstructed - as [(name,ba,ea)], where [name] is a subroutine name, [ba] is a - virtual address of a block start, and [ea] is an address of the - block end. *) val of_blocks : (string * addr * addr) seq -> t + (** [of_blocks] produces a symbolizer from a serialized sequence of blocks. + Each element of the sequence is deconstructed as [(name,ba,ea)], where + [name] is a subroutine name, [ba] is a virtual address of a block start, + and [ea] is an address of the block end. *) - (** [resolve symbolizer addr] returns a name of function, - to which a given address belongs. If the address is not know to - the symbolizer, then the name is constructed from an address *) val resolve : t -> addr -> string + (** [resolve symbolizer addr] returns a name of function, to which a given + address belongs. If the address is not know to the symbolizer, then the + name is constructed from an address *) - (** [chain ss] creates a symbolizer, that will try to resolve - an address using each symbolizer in order. *) val chain : t list -> t + (** [chain ss] creates a symbolizer, that will try to resolve an address + using each symbolizer in order. *) - (** [empty] is a symbolizer that knows nothing. *) val empty : t + (** [empty] is a symbolizer that knows nothing. *) - module Factory : - Source.Factory.S with type t = t [@@deprecated "[since 2019-05] use [provide]"] - + module Factory : Source.Factory.S with type t = t + [@@deprecated "[since 2019-05] use [provide]"] end (** Rooter finds starts of functions in the binary. *) module Rooter : sig type t = rooter - - (** [provide r] reflects the rooter information to the knowledge - base. - - @since 2.0.0 - *) val provide : t -> unit + (** [provide r] reflects the rooter information to the knowledge base. + @since 2.0.0 *) + val providing : t -> (unit -> 'a knowledge) -> 'a knowledge (** [providing t scope] provides the information in the specified [scope], - After the [scope] function is evaluated the information source - is retracted from the knowledge base. - - See {!Bap_knowledge.Knowledge.promising{promising}}. - - @since 2.2.0 - *) - val providing : t -> (unit -> 'a knowledge) -> 'a knowledge + After the [scope] function is evaluated the information source is + retracted from the knowledge base. + See {!Bap_knowledge.Knowledge.promising}. - (** [set_path s] limits the symbolizer applicability only to - addresses that belong to a file/compilation unit with the - specified path. + @since 2.2.0 *) - @since 2.2.0 - *) val set_path : t -> string -> t + (** [set_path s] limits the symbolizer applicability only to addresses that + belong to a file/compilation unit with the specified path. + @since 2.2.0 *) - (** [path s] is the path to the file that this symbolizer serves. - @since 2.2.0 - *) val path : t -> string option + (** [path s] is the path to the file that this symbolizer serves. + @since 2.2.0 *) - (** [create seq] creates a rooter from a given sequence of addresses *) val create : addr seq -> t + (** [create seq] creates a rooter from a given sequence of addresses *) - (** [of_image img] create a rooter that will use existing symbol - information inside the image, to find roots. *) val of_image : image -> t + (** [of_image img] create a rooter that will use existing symbol information + inside the image, to find roots. *) - (** [of_blocks] produces a rooter from a serialized - sequence of blocks. Each element of the sequence is deconstructed - as [(name,ba,ea)], where [name] is a subroutine name, [ba] is a - virtual address of a block start, and [ea] is an address of the - block end. *) val of_blocks : (string * addr * addr) seq -> t + (** [of_blocks] produces a rooter from a serialized sequence of blocks. Each + element of the sequence is deconstructed as [(name,ba,ea)], where [name] + is a subroutine name, [ba] is a virtual address of a block start, and + [ea] is an address of the block end. *) - (** [roots r] enumerates roots found by rooter [r] *) val roots : t -> addr seq + (** [roots r] enumerates roots found by rooter [r] *) - (** [union r1 r2] joins roots from rooters [r1] and [r2] *) val union : t -> t -> t + (** [union r1 r2] joins roots from rooters [r1] and [r2] *) - (** A factory of rooters. Useful to register custom rooters *) module Factory : Source.Factory.S with type t = t - [@@deprecated "[since 2019-05] use [provide]"] - + [@@deprecated "[since 2019-05] use [provide]"] + (** A factory of rooters. Useful to register custom rooters *) end - (** Brancher is responsible for resolving destinations of branch - instructions. *) + (** Brancher is responsible for resolving destinations of branch instructions. + *) module Brancher : sig open Disasm_expert.Basic + type t = brancher - (** destination target (if known) and edge classification (see {!edge}) *) type dest = addr option * edge [@@deriving sexp] + (** destination target (if known) and edge classification (see {!edge}) *) type dests = dest list [@@deriving sexp] - (** [create resolve] creates a brancher from [resolve] function, - that accepts a memory region, occupied by an instruction, the - instruction itself and returns a list of destination. *) val create : (mem -> full_insn -> dests) -> t + (** [create resolve] creates a brancher from [resolve] function, that + accepts a memory region, occupied by an instruction, the instruction + itself and returns a list of destination. *) - (** [set_path s] limits the symbolizer applicability only to - addresses that belong to a file/compilation unit with the - specified path. - - @since 2.2.0 - *) val set_path : t -> string -> t + (** [set_path s] limits the symbolizer applicability only to addresses that + belong to a file/compilation unit with the specified path. - (** [path s] is the path to the file that this symbolizer serves. - @since 2.2.0 - *) - val path : t -> string option + @since 2.2.0 *) + val path : t -> string option + (** [path s] is the path to the file that this symbolizer serves. + @since 2.2.0 *) - (** [of_bil arch] creates a brancher that will use a BIL code to - statically deduce the instruction destinations. *) val of_bil : arch -> t + (** [of_bil arch] creates a brancher that will use a BIL code to statically + deduce the instruction destinations. *) - (** [resolve brancher mem insn] returns a list of destinations of - the instruction [insn], that occupies memory region [mem]. *) val resolve : t -> mem -> full_insn -> dests + (** [resolve brancher mem insn] returns a list of destinations of the + instruction [insn], that occupies memory region [mem]. *) - - (** [provide brancher] provides the brancher information to the - knowledge base. *) val provide : t -> unit + (** [provide brancher] provides the brancher information to the knowledge + base. *) + val providing : t -> (unit -> 'a knowledge) -> 'a knowledge (** [providing t scope] provides the information in the specified [scope], - After the [scope] function is evaluated the information source - is retracted from the knowledge base. - - See {!Bap_knowledge.Knowledge.promising{promising}}. + After the [scope] function is evaluated the information source is + retracted from the knowledge base. - @since 2.2.0 - *) - val providing : t -> (unit -> 'a knowledge) -> 'a knowledge + See {!Bap_knowledge.Knowledge.promising}. + @since 2.2.0 *) module Factory : Source.Factory.S with type t = t - [@@deprecated "[since 2019-05] use [provide]"] - + [@@deprecated "[since 2019-05] use [provide]"] end - (** Reconstructor is responsible for reconstructing symbol table - from a CFG. It should partition a CFG into a set of possibly - intersecting functions. See {!Symtab} module for more - information about symbol table and functions. *) + (** Reconstructor is responsible for reconstructing symbol table from a CFG. + It should partition a CFG into a set of possibly intersecting functions. + See {!Symtab} module for more information about symbol table and + functions. *) module Reconstructor : sig type t = reconstructor - (** [create f] creates a reconstructor from a given function [f] *) val create : (cfg -> symtab) -> t + (** [create f] creates a reconstructor from a given function [f] *) - (** [default name roots] builds a reconstructor from a given - function, that maps addresses to function names (see - {!Symbolizer}) and a list of known function starts. The - reconstructor will extend the list of function starts with - destinations of call instructions found in the CFG. Also, - the reconstructor treats every node without input edges as - a function start. For each function start builds a function - using the following definition of a function: - - Function is built from the entry block and every block that - is reachable from it without using calls. *) val default : (word -> string) -> word list -> t + (** [default name roots] builds a reconstructor from a given function, that + maps addresses to function names (see {!Symbolizer}) and a list of known + function starts. The reconstructor will extend the list of function + starts with destinations of call instructions found in the CFG. Also, + the reconstructor treats every node without input edges as a function + start. For each function start builds a function using the following + definition of a function: + Function is built from the entry block and every block that is reachable + from it without using calls. *) - (** [of_blocks] produces a reconstructor from a serialized - sequence of blocks. Each element of the sequence is deconstructed - as [(name,ba,ea)], where [name] is a subroutine name, [ba] is a - virtual address of a block start, and [ea] is an address of the - block end. *) val of_blocks : (string * addr * addr) seq -> t + (** [of_blocks] produces a reconstructor from a serialized sequence of + blocks. Each element of the sequence is deconstructed as [(name,ba,ea)], + where [name] is a subroutine name, [ba] is a virtual address of a block + start, and [ea] is an address of the block end. *) - (** [run reconstructor cfg] reconstructs a symbol table from a - given cfg *) val run : t -> cfg -> symtab + (** [run reconstructor cfg] reconstructs a symbol table from a given cfg *) - (** a factory of reconstructors *) module Factory : Source.Factory.S with type t = t + (** a factory of reconstructors *) end (** Event subsystem. - This module is the [Bap_main_event] module extended with the - [Pritable.S] interface, kept here for backward compatibility. - *) + This module is the [Bap_main_event] module extended with the [Pritable.S] + interface, kept here for backward compatibility. *) module Event : sig - type t = Bap_main_event.t = .. type event = t = .. - (** global [stream] of events *) val stream : t stream + (** global [stream] of events *) - (** [send event] to the {!stream} *) val send : t -> unit + (** [send event] to the {!stream} *) - (** [register_printer f] when event [e] is printed, [f e] must be - [None] if [f] is not a subset of events, that is intended to be - printed by an [f]. If it is [Some str], then [str] is printed - out. - - If more than one printer returns [Some thing] for the same event, - then the last registered has the precedence.*) val register_printer : (t -> string option) -> unit + (** [register_printer f] when event [e] is printed, [f e] must be [None] if + [f] is not a subset of events, that is intended to be printed by an [f]. + If it is [Some str], then [str] is printed out. + + If more than one printer returns [Some thing] for the same event, then + the last registered has the precedence.*) (** Logging event.*) module Log : sig - type level = Bap_main_event.Log.level = - | Debug - | Info - | Warning - | Error + type level = Bap_main_event.Log.level = Debug | Info | Warning | Error type info = Bap_main_event.Log.info = { level : level; @@ -10153,244 +9599,216 @@ module Std : sig message : string; } - (** re-exports {!Bap_main_event.Log.Message} *) type event += Message of info - (** [message level ~section fmt ...] send a message of the - specified [level] and [section]. + val message : + level -> section:string -> ('a, Format.formatter, unit) format -> 'a + (** [message level ~section fmt ...] send a message of the specified + [level] and [section]. - Do not use this function directly, instead include the - instantiation of a [Self] functor, and use corresponding - logging functions, e.g., + Do not use this function directly, instead include the instantiation + of a [Self] functor, and use corresponding logging functions, e.g., {v include Self() (* ... *) info "created some %s" "thing" v} *) - val message : level -> section:string -> ('a,Format.formatter,unit) format -> 'a - - (** re-exports {!Bap_main_event.Log.Progress} *) - type event += Progress of { - task : string; (** hierarchical task name *) - note : string option; (** a short note *) - stage : int option; (** entered stage *) - total : int option; (** total number of stages *) - } + (** re-exports {!Bap_main_event.Log.Progress} *) + type event += + | Progress of { + task : string; (** hierarchical task name *) + note : string option; (** a short note *) + stage : int option; (** entered stage *) + total : int option; (** total number of stages *) + } - (** [progress ?note ?stage ?total name] sends a progress report. - This function should be used by the main components only, - while plugins should use the [report_progress] function from - the [Self()] interface. All parameters defaults to [None].*) val progress : ?note:string -> ?stage:int -> ?total:int -> string -> unit + (** [progress ?note ?stage ?total name] sends a progress report. This + function should be used by the main components only, while plugins + should use the [report_progress] function from the [Self()] interface. + All parameters defaults to [None].*) end include Printable.S with type t := t end type event = Event.t = .. - type project - (** The interface to the BAP toplevel state. - To create a project from the binary code BAP relies on the - knowledge base, which is a state monad underneath the hood, - or, put it simply, each knowledge computation is a function - of type [state -> state * 'a]. To enable backward compatibility, - we compute each such stateful computation in the toplevel, which - also stores the hidden state. + To create a project from the binary code BAP relies on the knowledge base, + which is a state monad underneath the hood, or, put it simply, each + knowledge computation is a function of type [state -> state * 'a]. To + enable backward compatibility, we compute each such stateful computation + in the toplevel, which also stores the hidden state. Using this interface it is possible to evaluate knowledge base - computations and extract their results to concrete values. Since - the knowledge base computations are not expression but objective - language, i.e., they evaluate to knowledge base objects, not to - values, we commonly need to create objects that will carry the - result of the computation as their properties. To ease the - process this module provides the notion of toplevel variables, - that denote such properties. Here is an example, how to run a - knowledge base computation and extract its result, assuming that + computations and extract their results to concrete values. Since the + knowledge base computations are not expression but objective language, + i.e., they evaluate to knowledge base objects, not to values, we commonly + need to create objects that will carry the result of the computation as + their properties. To ease the process this module provides the notion of + toplevel variables, that denote such properties. Here is an example, how + to run a knowledge base computation and extract its result, assuming that [analysis] is a function of type [unit -> my knowledge] {[ let result : my var = Toplevel.var "my-property" + let run analysis : my = Toplevel.put result (analysis ()); Toplevel.get result ]} - There are also [eval] and [exec] functions that could be used to - extract values from the existing properties, e.g., + There are also [eval] and [exec] functions that could be used to extract + values from the existing properties, e.g., {[ - let get_unit tid = - eval Theory.Label.unit (KB.return tid) + let get_unit tid = eval Theory.Label.unit (KB.return tid) ]} - Finally, the interface provides functions to control the inner - state of the toplevel, which is the knowledge base that is used - by BAP throught the lifetime of the BAP process and could be - also persistet between runs. E.g., the [disassemble] plugin is - persisting the knowledge base in the BAP cache facility and - loads it when it identifies that the input digest is the same. + Finally, the interface provides functions to control the inner state of + the toplevel, which is the knowledge base that is used by BAP throught the + lifetime of the BAP process and could be also persistet between runs. + E.g., the [disassemble] plugin is persisting the knowledge base in the BAP + cache facility and loads it when it identifies that the input digest is + the same. - Warning: this interface should be used with care, in particular, - it shall not be used in the context of another knowledge - computation that is also run in the toplevel. + Warning: this interface should be used with care, in particular, it shall + not be used in the context of another knowledge computation that is also + run in the toplevel. @since 2.2.0 made public and documented, the state interface - was available since 2.0.0 but wasn't documented and considered - official. + was available since 2.0.0 but wasn't documented and considered official. *) module Toplevel : sig - - (** this exception is raised when the knowledge computation - enters the inconsistent state. - - @since 2.2.0 - *) exception Conflict of Knowledge.conflict + (** this exception is raised when the knowledge computation enters the + inconsistent state. + @since 2.2.0 *) - (** {3 Toplevel variables} *) + (** {3 Toplevel variables} *) - (** the type of variables holding property ['p] *) type 'p var + (** the type of variables holding property ['p] *) - - (** [var name] creates a fresh variable. - - Creates and declares a fresh new property of the - [bap:toplevel] class. The name is mangled to prevent clashing - with existing properties, and each evaluation of this function - creates a new property that is distinct from any previously - created properties. - - Warning: this function changes the static representation of - the knowledge base (the scheme) and should be only used to - create static (global) variables, that have the lifetime of - the BAP process. It is not recommended to call this function - inside any other function. - *) val var : string -> 'p var + (** [var name] creates a fresh variable. + Creates and declares a fresh new property of the [bap:toplevel] class. + The name is mangled to prevent clashing with existing properties, and + each evaluation of this function creates a new property that is distinct + from any previously created properties. - (** [put var exp] evaluates [exp] and sets [var] to its result. + Warning: this function changes the static representation of the + knowledge base (the scheme) and should be only used to create static + (global) variables, that have the lifetime of the BAP process. It is not + recommended to call this function inside any other function. *) - @raise Conflict if [exp] ends up in the conflicting state. - *) val put : 'p var -> 'p knowledge -> unit + (** [put var exp] evaluates [exp] and sets [var] to its result. + @raise Conflict if [exp] ends up in the conflicting state. *) - (** [get var] reads the value of the variable. - - @raise Not_found if [var] was not set with [put]. - *) val get : 'p var -> 'p + (** [get var] reads the value of the variable. - (** {3 The slot interface} *) + @raise Not_found if [var] was not set with [put]. *) + (** {3 The slot interface} *) + val eval : ('a, 'p) Knowledge.slot -> 'a Knowledge.obj knowledge -> 'p (** [eval property obj_exp] gets [property] of [obj_exp]. - Evaluates the computation [obj_exp] that shall return an - object of class ['a] and returns the value ['p] of the specified - [property]. - - @raise Conflict when the knowledge base enters the conflicting - state. - *) - val eval : ('a,'p) Knowledge.slot -> 'a Knowledge.obj knowledge -> 'p + Evaluates the computation [obj_exp] that shall return an object of class + ['a] and returns the value ['p] of the specified [property]. + @raise Conflict when the knowledge base enters the conflicting state. *) - (** [try_eval property object] is like [eval property object] but - returns [Error conflict] instead of raising an exception. *) - val try_eval : ('a,'p) Knowledge.slot -> 'a Knowledge.obj knowledge -> - ('p,Knowledge.conflict) result - + val try_eval : + ('a, 'p) Knowledge.slot -> + 'a Knowledge.obj knowledge -> + ('p, Knowledge.conflict) result + (** [try_eval property object] is like [eval property object] but returns + [Error conflict] instead of raising an exception. *) + val exec : unit knowledge -> unit (** [exec stmt] executes the side-effectful knowledge computation. Executes the statement and updates the internal knowledge base. - @raise Conflict when the knowledge base enters the conflicting - state. - *) - val exec : unit knowledge -> unit - - + @raise Conflict when the knowledge base enters the conflicting state. *) - (** [try_exec stmt] is like [exec stmt] but returns - [Error conflict] instead of raising an exception. - *) - val try_exec : unit knowledge -> (unit,Knowledge.conflict) result - - - - (** {3 The state interface} *) + val try_exec : unit knowledge -> (unit, Knowledge.conflict) result + (** [try_exec stmt] is like [exec stmt] but returns [Error conflict] instead + of raising an exception. *) + (** {3 The state interface} *) - (** [set s] sets the knowledge base state to [s]. - - Any existing state is discarded. - *) val set : Knowledge.state -> unit + (** [set s] sets the knowledge base state to [s]. + Any existing state is discarded. *) - (** [current ()] is the current state of the knowledge base. *) val current : unit -> Knowledge.state + (** [current ()] is the current state of the knowledge base. *) - + val reset : unit -> unit (** [reset ()] resets the knowledge state to the empty state. - It is the same as [set @@ KB.empty] - *) - val reset : unit -> unit + It is the same as [set @@ KB.empty] *) end (** Disassembled program. - Project contains data that we were able to reconstruct during - the disassembly, semantic analysis, and other arbitrary amount of - analyses. + Project contains data that we were able to reconstruct during the + disassembly, semantic analysis, and other arbitrary amount of analyses. - Actually, project allows to associate arbitrary data with memory - regions, program terms, and even attach them globally to - itself. So it can be seen as a knowledge base of deeply - interconnected facts. + Actually, project allows to associate arbitrary data with memory regions, + program terms, and even attach them globally to itself. So it can be seen + as a knowledge base of deeply interconnected facts. - Other than delivering information, from the bap to a passes, it - can be also used as a communication media between different - passes, (see {!section:project}).*) + Other than delivering information, from the bap to a passes, it can be + also used as a communication media between different passes, (see + {!section:project}).*) module Project : sig - type t = project type state [@@deriving bin_io] type input type library - (** IO interface to a project data structure. *) include Data.S with type t := t + (** IO interface to a project data structure. *) + val create : + ?package:string -> + ?state:state -> + ?disassembler:string -> + ?brancher:brancher source -> + ?symbolizer:symbolizer source -> + ?rooter:rooter source -> + ?reconstructor:reconstructor source -> + input -> + t Or_error.t (** [create input] creates a project from the provided input source. - The input code regions are speculatively disassembled and the - set of basic blocks is determined, using the algorithm - described in {!Disasm.Driver}. After that the concrete whole - program control-flow graph (CFG) is built, which can be - accessed with the {!Project.disasm} function. The whole - program CFG is then partitioned into a set of subroutines - using the dominators analsysis, see {!Disasm.Subroutines} for - details. Based on this partition a symbol table, which is a - set of a subroutines control-flow graphs, is built. The symbol - table, which can be accessed with {!Project.symbols}, also - contains information about the interprocedural control - flow. Finally, the symbol table is translated into the - intermediate representation, which can be accessed using the + The input code regions are speculatively disassembled and the set of + basic blocks is determined, using the algorithm described in + {!Disasm.Driver}. After that the concrete whole program control-flow + graph (CFG) is built, which can be accessed with the {!Project.disasm} + function. The whole program CFG is then partitioned into a set of + subroutines using the dominators analsysis, see {!Disasm.Subroutines} + for details. Based on this partition a symbol table, which is a set of a + subroutines control-flow graphs, is built. The symbol table, which can + be accessed with {!Project.symbols}, also contains information about the + interprocedural control flow. Finally, the symbol table is translated + into the intermediate representation, which can be accessed using the {!Project.program} function. The whole process is pictured below. {v @@ -10448,25 +9866,25 @@ module Std : sig | binary program | | | +---------------------+ - v} - The disassembling process is fully integrated with the - knowledge base. If the input source provides information about - symbols and their location, then this information will be - automatically reflected to the knowledge base. + The disassembling process is fully integrated with the knowledge base. + If the input source provides information about symbols and their + location, then this information will be automatically reflected to the + knowledge base. - The [brancher], [symbolizer], and [rooter] parameters are - ignored since 2.0.0 and their information could be reflected - to the knowledge base using, correspondingly, - {!Brancher.provide}, {!Symbolizer.provide}, and + The [brancher], [symbolizer], and [rooter] parameters are ignored since + 2.0.0 and their information could be reflected to the knowledge base + using, correspondingly, {!Brancher.provide}, {!Symbolizer.provide}, and {!Rooter.provide} functions. - @param state if specified then the provided [state] will be - used as the initial state + @param state + if specified then the provided [state] will be used as the initial + state - @param package if specified, then all symbols during the - disassembly will be created (interned) in the specified package. + @param package + if specified, then all symbols during the disassembly will be created + (interned) in the specified package. @since 2.0.0 the state parameter is added @since 2.0.0 the parameter [disassembler] is unused @@ -10477,107 +9895,93 @@ module Std : sig @since 2.2.0 the parameter [package] is added @since 2.6.0 if [input] consists of library files in addition - to the main binary, then the accessors to the state of the - project reflect that of the main binary, except for [program], - which contains the code of both the main program and the library - programs linked together. - *) - val create : - ?package:string -> - ?state:state -> - ?disassembler:string -> - ?brancher:brancher source -> - ?symbolizer:symbolizer source -> - ?rooter:rooter source -> - ?reconstructor:reconstructor source -> - input -> t Or_error.t + to the main binary, then the accessors to the state of the project + reflect that of the main binary, except for [program], which contains + the code of both the main program and the library programs linked + together. *) - (** [empty target] creates a for the given [target]. *) val empty : Theory.Target.t -> t + (** [empty target] creates a for the given [target]. *) + val arch : t -> arch (** [arch project] reveals the architecture of a loaded file - @deprecated use [target project] instead. - *) - val arch : t -> arch + @deprecated use [target project] instead. *) + val target : t -> Theory.Target.t (** [target project] returns the target system of the project. - @since 2.2.0 - *) - val target : t -> Theory.Target.t + @since 2.2.0 *) + val specification : t -> Ogre.doc (** [specification p] returns the specification of the binary. @since 2.2.0 *) - val specification : t -> Ogre.doc + val state : t -> state (** [state project] returns the core state of the [project]. @since 2.0.0 *) - val state : t -> state - (** [disasm project] returns results of disassembling *) + val disasm : t -> disasm + (** [disasm project] returns results of disassembling *) - (** [program project] returns a program lifted into {{!sema}IR} *) val program : t -> program term + (** [program project] returns a program lifted into {{!sema}IR} *) - (** [with_program project program] updates a project program *) val with_program : t -> program term -> t + (** [with_program project program] updates a project program *) - - (** [map_program t ~f] maps the IR representation of the program - with function [f]. - - @since 2.6.0 the program is no longer lazily computed. - *) val map_program : t -> f:(program term -> program term) -> t + (** [map_program t ~f] maps the IR representation of the program with + function [f]. + + @since 2.6.0 the program is no longer lazily computed. *) - (** [symbols t] returns reconstructed symbol table *) val symbols : t -> symtab + (** [symbols t] returns reconstructed symbol table *) - (** [with_symbols project symbols] updates [project] symbols *) val with_symbols : t -> symtab -> t + (** [with_symbols project symbols] updates [project] symbols *) - (** returns an attribute storage of the project *) val storage : t -> dict + (** returns an attribute storage of the project *) - (** updates the attribute storage *) val with_storage : t -> dict -> t + (** updates the attribute storage *) - (** [memory t] returns the memory as an interval tree marked with - arbitrary values. *) val memory : t -> value memmap + (** [memory t] returns the memory as an interval tree marked with arbitrary + values. *) - (** the memory of the unit in the knowledge base. - @since 2.2.0 *) val memory_slot : (Theory.Unit.cls, value memmap) KB.slot + (** the memory of the unit in the knowledge base. + @since 2.2.0 *) - - (** [tag_memory project region tag value] tags a given [region] of - memory in [project] with a given [tag] and [value]. Example: - [Project.tag_memory project tained color red] - *) val tag_memory : t -> mem -> 'a tag -> 'a -> t + (** [tag_memory project region tag value] tags a given [region] of memory in + [project] with a given [tag] and [value]. Example: + [Project.tag_memory project tained color red] *) - (** [substitute p region tag value] is like - {{!tag_memory}tag_memory}, but it will also apply - substitutions in the provided string value, as per OCaml - standard library's [Buffer.add_substitute] function. + val substitute : t -> mem -> string tag -> string -> t + (** [substitute p region tag value] is like {{!tag_memory}tag_memory}, but + it will also apply substitutions in the provided string value, as per + OCaml standard library's [Buffer.add_substitute] function. - Example: {[ + Example: + {[ Project.substitute project comment "$symbol starts at $symbol_addr" ]} The following substitutions are supported: - [$section{_name,_addr,_min_addr,_max_addr}] - name of region of file - to which it belongs. For example, in ELF this name will - correspond to the section name + to which it belongs. For example, in ELF this name will correspond to + the section name - - [$symbol{_name,_addr,_min_addr,_max_addr}] - name or address - of the symbol to which this memory belongs + - [$symbol{_name,_addr,_min_addr,_max_addr}] - name or address of the + symbol to which this memory belongs - [$asm] - assembler listing of the memory region @@ -10589,465 +9993,428 @@ module Std : sig - [$min_addr, $addr] - starting address of a memory region - [$max_addr] - address of the last byte of a memory region. *) - val substitute : t -> mem -> string tag -> string -> t - (** [with_memory project] updates project memory. It is - recommended to use {!tag_memory} and {!substitute} instead of this - function, if possible. *) val with_memory : t -> value memmap -> t + (** [with_memory project] updates project memory. It is recommended to use + {!tag_memory} and {!substitute} instead of this function, if possible. + *) (** {3 Extensible record} - Project can also be viewed as an extensible record, where one - can store arbitrary values. Example, + Project can also be viewed as an extensible record, where one can store + arbitrary values. Example, {[ let p = Project.set project color `green ]} This will set field [color] to a value [`green].*) - (** [set project field value] sets a [field] to a give value. If - [field] was already set, then new value overrides the old - one. Otherwise the field is added. *) val set : t -> 'a tag -> 'a -> t + (** [set project field value] sets a [field] to a give value. If [field] was + already set, then new value overrides the old one. Otherwise the field + is added. *) - (** [get project field] returns the value of the [field] if it - exists *) val get : t -> 'a tag -> 'a option + (** [get project field] returns the value of the [field] if it exists *) - (** [has project field] checks whether field exists or not. Useful - for fields of type unit, that actually isomorphic to bool fields, - e.g., [if Project.has project mark] *) val has : t -> 'a tag -> bool + (** [has project field] checks whether field exists or not. Useful for + fields of type unit, that actually isomorphic to bool fields, e.g., + [if Project.has project mark] *) - (** [del project attr] removes an attribute from a project *) val del : t -> 'a tag -> t + (** [del project attr] removes an attribute from a project *) - (** [libraries project] returns the shared libraries that were loaded - with [project]. *) val libraries : t -> library list + (** [libraries project] returns the shared libraries that were loaded with + [project]. *) (** A library that was loaded alongside the main program. - @since 2.6.0 - *) + @since 2.6.0 *) module Library : sig type t = library - (** [unit library] returns the unit associated with the library. *) val unit : library -> Theory.Unit.t + (** [unit library] returns the unit associated with the library. *) + val arch : library -> arch (** [arch library] reveals the architecture of the library. @since 2.6.0 - @deprecated use [target library] instead. - *) - val arch : library -> arch + @deprecated use [target library] instead. *) + val target : library -> Theory.Target.t (** [target library] returns the target system of the library. - @since 2.6.0 - *) - val target : library -> Theory.Target.t + @since 2.6.0 *) + val specification : library -> Ogre.doc (** [specification library] returns the specification of the library. - @since 2.6.0 - *) - val specification : library -> Ogre.doc + @since 2.6.0 *) + val state : library -> state (** [state library] returns the core state of the library. - @since 2.6.0 - *) - val state : library -> state + @since 2.6.0 *) + val disasm : library -> disasm (** [disasm library] returns the results of disassembling the library. - @since 2.6.0 - *) - val disasm : library -> disasm + @since 2.6.0 *) + val memory : library -> value memmap (** [memory library] returns the memory of the library. - @since 2.6.0 - *) - val memory : library -> value memmap + @since 2.6.0 *) end (** Information obtained during project reconstruction. - These pieces of information are guaranteed to be discovered - during the project reconstruction. See {!Project.create} - function for more information on the reconstruction process. *) + These pieces of information are guaranteed to be discovered during the + project reconstruction. See {!Project.create} function for more + information on the reconstruction process. *) module Info : sig - (** occurs every time a new file is opened. The value is a filename *) val file : string stream + (** occurs every time a new file is opened. The value is a filename *) - (** occurs once input architecture is known *) val arch : arch stream + (** occurs once input architecture is known *) - (** occurs once input memory is loaded *) val data : value memmap stream + (** occurs once input memory is loaded *) - (** occurs once code segment is discovered *) val code : value memmap stream + (** occurs once code segment is discovered *) - (** occurs every time a whole program control flow graph is changed *) val cfg : cfg stream + (** occurs every time a whole program control flow graph is changed *) - (** occurs every time a symbol table is changed *) val symtab : symtab stream + (** occurs every time a symbol table is changed *) - (** occurs every time a program term is changed during the - project reconstruction process. *) val program : program term stream + (** occurs every time a program term is changed during the project + reconstruction process. *) - (** occurs once image spec is known *) val spec : Ogre.Doc.t stream + (** occurs once image spec is known *) end - (** The core state of the project. - @since 2.2.0 - *) + @since 2.2.0 *) module State : sig - - (** the abstract type for the project state. - See {!Project.state}. - *) type t = state + (** the abstract type for the project state. See {!Project.state}. *) - - (** [disassembly state] contains all disassembled instructions, - as well as their connection. To build control-flow graphs or - to explore the graph structure, use {!Disasm.Driver.explore}. - *) val disassembly : t -> Disasm.Driver.state + (** [disassembly state] contains all disassembled instructions, as well as + their connection. To build control-flow graphs or to explore the graph + structure, use {!Disasm.Driver.explore}. *) - (** [subroutines state] returns the partition of the set of - disassembled instructions into a set of subroutines. *) val subroutines : t -> Disasm.Subroutines.t + (** [subroutines state] returns the partition of the set of disassembled + instructions into a set of subroutines. *) - (** the slot of a unit object that stores the state of disassembly *) val slot : (Theory.Unit.cls, state) KB.slot + (** the slot of a unit object that stores the state of disassembly *) end - - - - (** Input information. - This module abstracts the input data necessary to create a - project. *) + This module abstracts the input data necessary to create a project. *) module Input : sig type t = input + val load : + ?target:Theory.Target.t -> + ?loader:string -> + ?libraries:string list -> + string -> + t + (** [load filename] loads the file from the specified path. The file must + be regular (i.e., not a pipe) and is expected to have the necessary + meta information, i.e., not the raw code (use [raw_file] to load files + that are raw code). - (** [load filename] loads the file from the specified path. - The file must be regular (i.e., not a pipe) and is expected - to have the necessary meta information, i.e., not the raw - code (use [raw_file] to load files that are raw code). - - If [loader] is not specified then all image loaders are used - and the information from the is merged, otherwise only the - selected loaded is used. See {!Image.available_backend}. + If [loader] is not specified then all image loaders are used and the + information from the is merged, otherwise only the selected loaded is + used. See {!Image.available_backend}. - The [target] could be used to override the target - information derived from the input file. + The [target] could be used to override the target information derived + from the input file. @since 2.5.0 if [target] is specified then it is used - instead of the derived target, and the derivation itself is - not performed. - @before 2.5.0 if [target] is specified and is less specific, - then the derived target it will be ignored, if it - contradicts the information in the file then the project - creation will fail. + instead of the derived target, and the derivation itself is not + performed. - @since 2.6.0 a list of files [libraries] can be provided, - which are libraries that will be linked with the main program. - It is presumed that they are specified in topological order. - - @since 2.2.0 *) - val load : - ?target:Theory.Target.t -> - ?loader:string -> - ?libraries:string list -> - string -> t + @before 2.5.0 + if [target] is specified and is less specific, then the derived + target it will be ignored, if it contradicts the information in the + file then the project creation will fail. - (** [raw_file ?base target ~filename] creates an input from a binary - file that is raw code for the given [target], i.e., - without any headers or meta information. + @since 2.6.0 a list of files [libraries] can be provided, - @param base is an virtual address of the first byte - (defaults to 0). + which are libraries that will be linked with the main program. It is + presumed that they are specified in topological order. @since 2.2.0 *) - val raw_file : ?base:addr -> Theory.Target.t -> string -> t + val raw_file : ?base:addr -> Theory.Target.t -> string -> t + (** [raw_file ?base target ~filename] creates an input from a binary file + that is raw code for the given [target], i.e., without any headers or + meta information. - (** [create ?base target code] creates input from the binary - [code] for the given [target]. + @param base is an virtual address of the first byte (defaults to 0). @since 2.2.0 *) - val from_string : ?base:addr -> Theory.Target.t -> string -> t - (** [create ?base target code] creates input from the binary - [code] for the given [target]. + val from_string : ?base:addr -> Theory.Target.t -> string -> t + (** [create ?base target code] creates input from the binary [code] for + the given [target]. @since 2.2.0 *) - val from_bigstring : ?base:addr -> Theory.Target.t -> Bigstring.t -> t - - - (** [custom target] creates a custom input. - The [target] parameter denotes the target system of the - input program. The [code] and [data] parameters are stored - in the [Project.memory] and [code] is disassembled and - lifted if the specified [target] has a disassembler and lifter. + val from_bigstring : ?base:addr -> Theory.Target.t -> Bigstring.t -> t + (** [create ?base target code] creates input from the binary [code] for + the given [target]. - The [filename] is used to communicate with external tools - and will be broadcasted via [Info.file] stream and stored in - the filename property of the project, otherwise it is not - used when the project is created. + @since 2.2.0 *) - The [finish project] is the post-constructor that takes the - nearly finished project (with code and data and potentially - disassembled and lifted code) and constructs the final - project. - *) val custom : ?finish:(project -> project) -> ?filename:string -> ?code:value memmap -> ?data:value memmap -> - Theory.Target.t -> t + Theory.Target.t -> + t + (** [custom target] creates a custom input. + The [target] parameter denotes the target system of the input program. + The [code] and [data] parameters are stored in the [Project.memory] + and [code] is disassembled and lifted if the specified [target] has a + disassembler and lifter. + + The [filename] is used to communicate with external tools and will be + broadcasted via [Info.file] stream and stored in the filename property + of the project, otherwise it is not used when the project is created. + + The [finish project] is the post-constructor that takes the nearly + finished project (with code and data and potentially disassembled and + lifted code) and constructs the final project. *) - (** [register_loader name load] register a loader under provided - [name]. The [load] function will be called the filename, and it - must return the [input] value. *) val register_loader : string -> (string -> t) -> unit + (** [register_loader name load] register a loader under provided [name]. + The [load] function will be called the filename, and it must return + the [input] value. *) - (** [available_loaders ()] returns a list of names of currently known loaders. *) val available_loaders : unit -> string list + (** [available_loaders ()] returns a list of names of currently known + loaders. *) (** {3 Deprecated Interface} - The following functions are deprecated and better - alternatives are provided. - They might be removed in BAP 3.0.*) + The following functions are deprecated and better alternatives are + provided. They might be removed in BAP 3.0.*) - (** [binary ?base arch ~filename] create an input from a binary - file that is a pure code without any headers or meta - information. + val binary : ?base:addr -> arch -> filename:string -> t + (** [binary ?base arch ~filename] create an input from a binary file that + is a pure code without any headers or meta information. - @param base is an virtual address of the first byte - (defaults to 0). + @param base is an virtual address of the first byte (defaults to 0). - @deprecated use [Input.raw_file] instead. - *) - val binary : ?base:addr -> arch -> filename:string -> t + @deprecated use [Input.raw_file] instead. *) + val file : ?loader:string -> filename:string -> t (** [file ?target ?loader ~filename] input data from a file, using the specified loader. If [loader] is not specified, then some existing - loader will be used. If it is specified, then it is first looked - up in the [available_loaders] and if it is not found, then it will - be looked up in the {!Image.available_backends}. + loader will be used. If it is specified, then it is first looked up in + the [available_loaders] and if it is not found, then it will be looked + up in the {!Image.available_backends}. - @deprecated use [Input.load filename] - *) - val file : ?loader:string -> filename:string -> t + @deprecated use [Input.load filename] *) - (** [create arch filename ~code ~data] creates an input from a - file, using two memory maps. The [code] memmap spans the code in - the file, and [data] spans the data. An optional [finish] - function can be used to propagate to the project any - additional information that is available to the loader. It - defaults to [Fn.id]. - @deprecated use either [Input.custom] or [Input.from_string] - and [Input.from_bigstring]. - *) val create : ?finish:(project -> project) -> - arch -> string -> code:value memmap -> data:value memmap -> t + arch -> + string -> + code:value memmap -> + data:value memmap -> + t + (** [create arch filename ~code ~data] creates an input from a file, using + two memory maps. The [code] memmap spans the code in the file, and + [data] spans the data. An optional [finish] function can be used to + propagate to the project any additional information that is available + to the loader. It defaults to [Fn.id]. + @deprecated + use either [Input.custom] or [Input.from_string] and + [Input.from_bigstring]. *) end (** {3 Registering passes} - To add new pass one of the following [register_*] functions - should be called.*) + To add new pass one of the following [register_*] functions should be + called.*) type pass - (** [register_pass ?autorun ?runonce ?deps ?name pass] registers a - [pass] over a project. + val register_pass : + ?autorun:bool -> + (* defaults to [false] *) + ?runonce:bool -> + (* defaults to [autorun] *) + ?deps:string list -> + ?name:string -> + (t -> t) -> + unit + (** [register_pass ?autorun ?runonce ?deps ?name pass] registers a [pass] + over a project. + + If [autorun] is [true], then the host program will run this pass + automatically. If [runonce] is true, then for a given project the pass + will be run only once. Each repeating attempts to run the pass will be + ignored. The [runonce] parameter defaults to [false] when [autorun] is + [false], and to [true] otherwise. + + Parameter [deps] is list of dependencies. Each dependency is a name of a + pass, that should be run before the [pass]. The dependencies will be run + in a specified order every time the [pass] is run. - If [autorun] is [true], then the host program will run this - pass automatically. If [runonce] is true, then for a given - project the pass will be run only once. Each repeating - attempts to run the pass will be ignored. The [runonce] - parameter defaults to [false] when [autorun] is [false], and - to [true] otherwise. + To get access to command line arguments use [Plugin.argv] *) - Parameter [deps] is list of dependencies. Each dependency is a - name of a pass, that should be run before the [pass]. The - dependencies will be run in a specified order every time the - [pass] is run. + val register_pass' : + ?autorun:bool -> + (* defaults to [false] *) + ?runonce:bool -> + (* defaults to [autorun] *) + ?deps:string list -> + ?name:string -> + (t -> unit) -> + unit + (** [register_pass' pass] registers [pass] that doesn't modify the project + effect and is run only for side effect. (See {!register_pass}) *) - To get access to command line arguments use [Plugin.argv] *) - val register_pass : - ?autorun:bool -> (** defaults to [false] *) - ?runonce:bool -> (** defaults to [autorun] *) - ?deps:string list -> ?name:string -> (t -> t) -> unit - - (** [register_pass' pass] registers [pass] that doesn't modify - the project effect and is run only for side effect. - (See {!register_pass}) *) - val register_pass': - ?autorun:bool -> (** defaults to [false] *) - ?runonce:bool -> (** defaults to [autorun] *) - ?deps:string list -> ?name:string -> (t -> unit) -> unit - - (** [passes ()] returns all currently registered passes. *) val passes : unit -> pass list + (** [passes ()] returns all currently registered passes. *) - (** [find_pass name] returns a pass with the given name. *) val find_pass : string -> pass option + (** [find_pass name] returns a pass with the given name. *) - (** time duration in seconds *) type second = float + (** time duration in seconds *) (** A program analysis pass. - Pass is essentially a function that takes a project data - structures, and returns a new project, possibly modified. + Pass is essentially a function that takes a project data structures, and + returns a new project, possibly modified. - Passes may depend on other passes, and have a few properties, - associated with them. *) + Passes may depend on other passes, and have a few properties, associated + with them. *) module Pass : sig - type t = pass (** An error that can occur when loading or running pass. - - [Not_loaded name] pass with a given [name] wasn't loaded for - some reason. This is a very unlikely error, indicating - either a logic error in the plugin system implementation or - something very weird, that we didn't expect. + - [Not_loaded name] pass with a given [name] wasn't loaded for some + reason. This is a very unlikely error, indicating either a logic + error in the plugin system implementation or something very weird, + that we didn't expect. - - [Not_loaded name] when we tried to load plugin with a given - [name] we failed to find it in our search paths. + - [Not_loaded name] when we tried to load plugin with a given [name] + we failed to find it in our search paths. - - [Runtime_error (name,exn)] when plugin with a given [name] - was run it raised an [exn]. - - *) - type error = - | Unsat_dep of pass * string - | Runtime_error of pass * exn + - [Runtime_error (name,exn)] when plugin with a given [name] was run + it raised an [exn]. *) + type error = Unsat_dep of pass * string | Runtime_error of pass * exn [@@deriving sexp_of] - (** raised when a pass failed to load or to run. Note: this - exception is raised only from two functions in this module, that - state this in their documentation and has [_exn] suffix in their - name. *) - exception Failed of error [@@deriving sexp] + exception Failed of error + [@@deriving sexp] + (** raised when a pass failed to load or to run. Note: this exception is + raised only from two functions in this module, that state this in + their documentation and has [_exn] suffix in their name. *) + val run : t -> project -> (project, error) Result.t (** [run_pass project pass] applies [pass] to a [project]. - If a pass has dependencies, then they will be run before the - pass in some topological order. *) - val run : t -> project -> (project,error) Result.t + If a pass has dependencies, then they will be run before the pass in + some topological order. *) + val run_exn : t -> project -> project (** [run_pass_exn proj] is the same as {!run_pass}, but raises an - exception on error. Useful to provide custom error - handling/printing. + exception on error. Useful to provide custom error handling/printing. - @raise Pass_failed if failed to load, or if plugin failed at - runtime. *) - val run_exn : t -> project -> project + @raise Pass_failed if failed to load, or if plugin failed at runtime. + *) - (** [name pass] is a pass name *) val name : t -> string + (** [name pass] is a pass name *) - (** [autorun pass] is [true] if a [pass] was created with - autorun option *) val autorun : t -> bool + (** [autorun pass] is [true] if a [pass] was created with autorun option + *) end - (** A pass that collates projects. A collator is a pass that is folded over projects and computes - differences between the base version and the number of - alternative versions. + differences between the base version and the number of alternative + versions. - @since 2.2.0 - *) + @since 2.2.0 *) module Collator : sig - type t - - (** Information about a collator. *) type info + (** Information about a collator. *) - - (** [register ~prepare ~collate ~summary name] registers a collator. - - The [prepare] function is called on the base version and it - returns the collator's state that can be an arbitrary type - ['s]. Then the [collate] function is consequitevely applied - on alternative versions of the base version, with the - version number passed as the first argument (starting from - 0). Finally, when all versions are compared with the base, - the summary function is called. - - The collator fullname (package:name) must be unique, - otherwise a function terminates. - *) - val register : ?desc:string -> ?package:string -> string -> + val register : + ?desc:string -> + ?package:string -> + string -> prepare:(project -> 's) -> collate:(int -> 's -> project -> 's) -> summary:('s -> unit) -> unit + (** [register ~prepare ~collate ~summary name] registers a collator. + The [prepare] function is called on the base version and it returns + the collator's state that can be an arbitrary type ['s]. Then the + [collate] function is consequitevely applied on alternative versions + of the base version, with the version number passed as the first + argument (starting from 0). Finally, when all versions are compared + with the base, the summary function is called. - (** [apply collator projects] applies the [collator] to the - sequence of projects. + The collator fullname (package:name) must be unique, otherwise a + function terminates. *) - Projects are evaluated lazily, one project at time. - *) val apply : t -> project seq -> unit + (** [apply collator projects] applies the [collator] to the sequence of + projects. + Projects are evaluated lazily, one project at time. *) - (** [find ?package name] looks up a collator in the registry. *) val find : ?package:string -> string -> t option + (** [find ?package name] looks up a collator in the registry. *) - - (** the collators name *) val name : info -> Knowledge.Name.t + (** the collators name *) - - (** the collators description. *) val desc : info -> string + (** the collators description. *) - (** information about currently registered collators *) val registered : unit -> info list + (** information about currently registered collators *) end - (** Knowledge base analyses. - A registry of the knowledge base computations that could be - used for exploring and refining the facts stored in the - knowledge base. + A registry of the knowledge base computations that could be used for + exploring and refining the facts stored in the knowledge base. - An analysis could be parameterized by an arbitrary number of - arguments, e.g., to register a function [print_subr] that has - type + An analysis could be parameterized by an arbitrary number of arguments, + e.g., to register a function [print_subr] that has type {[ tid -> string -> Bitvec.t -> unit knowledge @@ -11063,31 +10430,29 @@ module Std : sig ]} The registered analyses could be invoked directly, using the - [Analysis.apply] function or via the [analysis] plugin that - provides a REPL as well as an ability to call analysis from - the command-line interface or from a script. To get the list - of available analyses, run `bap analyze commands`. - *) + [Analysis.apply] function or via the [analysis] plugin that provides a + REPL as well as an ability to call analysis from the command-line + interface or from a script. To get the list of available analyses, run + `bap analyze commands`. *) module Analysis : sig - - (** the type for analyses *) type t + (** the type for analyses *) - (** information about an analysis *) type info + (** information about an analysis *) - (** a description of the analysis application syntax *) type grammar + (** a description of the analysis application syntax *) - (** a description of an analysis argument *) type 'a arg + (** a description of an analysis argument *) + type ('a, 'r) args (** a signature of an analysis. - The ['r] type denotes the return type of an analysis, - which is always [unit knowledge] and the ['a] type - variable denotes the function type of the analysis, - e.g., an analysis of type + The ['r] type denotes the return type of an analysis, which is always + [unit knowledge] and the ['a] type variable denotes the function type + of the analysis, e.g., an analysis of type {[ tid -> Bitvec.t -> unit knowledge @@ -11097,400 +10462,387 @@ module Std : sig {[ (tid -> Bitvec.t -> unit knowledge, unit knowledge) args - ]} - - *) - type ('a,'r) args - + ]} *) - (** [apply analysis] is the computation performed by the analysis. *) val apply : t -> string list -> unit knowledge + (** [apply analysis] is the computation performed by the analysis. *) - (** [find ?package string] searches the analysis with the given - name in the registry. *) val find : ?package:string -> string -> t option + (** [find ?package string] searches the analysis with the given name in + the registry. *) - (** [name info] is the analysis unique name. *) val name : info -> Knowledge.Name.t + (** [name info] is the analysis unique name. *) - (** [desc info] is the short description of the analysis *) val desc : info -> string + (** [desc info] is the short description of the analysis *) - (** [grammar info] is the description of the rule grammar. *) val grammar : info -> grammar + (** [grammar info] is the description of the rule grammar. *) + val register : + ?desc:string -> + ?package:string -> + string -> + ('a, unit knowledge) args -> + 'a -> + unit (** [register ?desc ?package name comp] registers the knowledge - computation as an analysis. The [package:name] pair should - be unique. *) - val register : ?desc:string -> ?package:string -> string -> - ('a,unit knowledge) args -> 'a -> unit + computation as an analysis. The [package:name] pair should be unique. + *) - (** information about currently registered analyses *) val registered : unit -> info list + (** information about currently registered analyses *) + val args : 'a arg -> ('a -> 'b, 'b) args (** [args x] a unary signature. - Creates a signature of a function that takes one - argument. The type ['a] of the argument and its syntax - are represented by the value of type ['a arg]. + Creates a signature of a function that takes one argument. The type + ['a] of the argument and its syntax are represented by the value of + type ['a arg]. Examples, - - {[args empty]} -- a function of type [unit -> 'r] - - {[args string]} -- a function of type [string -> 'r]. - - Note, while the ['r] type is kept as a variable it will be - concretized to the [unit knowledge] when the function of - this type will be registered using the [register] function. - *) - val args : 'a arg -> ('a -> 'b, 'b) args - - + {ul + {- {[ + args empty + ]} + -- a function of type [unit -> 'r] + } + {- {[ + args string + ]} + -- a function of type [string -> 'r]. + } + } + + Note, while the ['r] type is kept as a variable it will be concretized + to the [unit knowledge] when the function of this type will be + registered using the [register] function. *) + + val ( $ ) : ('a, 'b -> 'c) args -> 'b arg -> ('a, 'c) args (** [args $ arg] appends [arg] to [args]. - If [args] denote a signature of a function with type - [x -> y] and [arg] has type [z], then [args $ arg] denote - a signature of type [x -> y -> z]. + If [args] denote a signature of a function with type [x -> y] and + [arg] has type [z], then [args $ arg] denote a signature of type + [x -> y -> z]. {3 Example} - [args string $ bitvec $ program] - denotes a function of - type [string -> Bitvec.t -> Theory.Label.t -> 'r]. + [args string $ bitvec $ program] - denotes a function of type + [string -> Bitvec.t -> Theory.Label.t -> 'r]. {3 A note on the type} - The type of the [$] makes a little bit more clear if we - will consider the following example, - [args string $ bitvec], where + The type of the [$] makes a little bit more clear if we will consider + the following example, [args string $ bitvec], where - [args string] has type [(string -> 'r,'r) args] and - [bitvec] has type [Bitvec.t arg]. The type of [args string $ bitvec] is computed by unifying - [string -> 'r] with ['a] and ['r] with ['b -> 'c], where - ['b] is [Bitvec.t]. A syntactic unification gives us the - following values for the variables ['r] and ['a] + [string -> 'r] with ['a] and ['r] with ['b -> 'c], where ['b] is + [Bitvec.t]. A syntactic unification gives us the following values for + the variables ['r] and ['a] - ['r = Bitvec.t -> 'c] - ['a = string -> 'r = string -> Bitvec.t -> 'c] Therefore the type of [args string $ bitvec] is {[ ('a,'c) args = (string -> Bitvec.t -> 'c,'c) args - ]} - - *) - val ($) : ('a, 'b -> 'c) args -> 'b arg -> ('a,'c) args + ]} *) - (** {2 Grammar Rules} *) + (** {2 Grammar Rules} *) - (** {3 Terminals} *) + (** {3 Terminals} *) + val empty : unit arg (** [empty] no arguments. - The syntax is an empty string and the signature is a unary - function that takes an argument of type [unit]. *) - val empty : unit arg + The syntax is an empty string and the signature is a unary function + that takes an argument of type [unit]. *) + val string : string arg (** [string] a string argument. The syntax is a string of characters that does not include whitespaces. *) - val string : string arg + val bitvec : Bitvec.t arg (** [bitvec] a bitvector. The syntax is described in the {!Bitvec.of_string} and is a - non-negative binary, octal, hexadecimal, or decimal - numeral. *) - val bitvec : Bitvec.t arg - + non-negative binary, octal, hexadecimal, or decimal numeral. *) - (** [program] a program label. - - The syntax is a textual representation of the knowledge base - symbol ({!Knowledge.Symbol}). Unqualified names are read in - the current package. - - Examples, [0x88f0] or [bin/arm-linux-gnueabi-echo:0x88f0]. - *) val program : Theory.Label.t arg + (** [program] a program label. + The syntax is a textual representation of the knowledge base symbol + ({!Knowledge.Symbol}). Unqualified names are read in the current + package. - (** [unit] a program unit. - - The syntax is a textual representation of the knowledge base - symbol ({!Knowledge.Symbol}). Unqualified names are read in - the current package. + Examples, [0x88f0] or [bin/arm-linux-gnueabi-echo:0x88f0]. *) - Examples, [file:/bin/ls] or [my-unit]. - *) val unit : Theory.Unit.t arg + (** [unit] a program unit. + The syntax is a textual representation of the knowledge base symbol + ({!Knowledge.Symbol}). Unqualified names are read in the current + package. - (** [argument ~parse name] defines a new terminal. - - The [name] denotes the name of the rule as it will appear in - the grammar definition. The [parse] function defines the - grammar, it is called as [parse fail input] where [input] is - the value of type [string]. The [parse] function should - either produce a value of type ['a] if [input] is a valid - representation or use [fail error] to indicate that it is - invalid, where [error] is the error message. + Examples, [file:/bin/ls] or [my-unit]. *) - The [parse] function is a knowledge computation so it can - access the knowledge base to construct the value. - *) val argument : ?desc:string -> parse:(fail:(string -> _ knowledge) -> string -> 'a knowledge) -> - string -> 'a arg + string -> + 'a arg + (** [argument ~parse name] defines a new terminal. + The [name] denotes the name of the rule as it will appear in the + grammar definition. The [parse] function defines the grammar, it is + called as [parse fail input] where [input] is the value of type + [string]. The [parse] function should either produce a value of type + ['a] if [input] is a valid representation or use [fail error] to + indicate that it is invalid, where [error] is the error message. - (** {3 Non-terminals} *) + The [parse] function is a knowledge computation so it can access the + knowledge base to construct the value. *) - (** [optional x] an optional argument [x]. + (** {3 Non-terminals} *) - The syntax of [args xs $ optional x] is [ []], where - [] denotes the syntax of the argument [x], [[]] indicates - that it can be omitted, and [] is the grammar of the - signature [xs]. An optional argument should be the last - argument in the signature, otherwise the resulting grammar - will be ambiguous. + val optional : 'a arg -> 'a option arg + (** [optional x] an optional argument [x]. + The syntax of [args xs $ optional x] is [ []], where [] + denotes the syntax of the argument [x], [[]] indicates that it can be + omitted, and [] is the grammar of the signature [xs]. An optional + argument should be the last argument in the signature, otherwise the + resulting grammar will be ambiguous. Example, the grammar of {[ args string $ optional bitvec - }], + ]} + , recognizes the following strings, - - ["hello"] - - ["hello 0x42"] *) - val optional : 'a arg -> 'a option arg - + - ["hello"] + - ["hello 0x42"] *) + val keyword : string -> 'a arg -> 'a option arg (** [keyword s x] an optional keyworded argument [x]. - The syntax of [args xs $ keyword s x] is {[ [: ]]}, - where [] denotes the syntax of the argument [x], [[]] - indicates that it can be omitted, [:] is the literal string - [":"], where [] is equal to [s], and [] is the - grammar of the signature [xs]. If a grammar includes several - keyworded arguments they may follow in an arbitrary order. - + The syntax of [args xs $ keyword s x] is + {[ + [: ] + ]} + , where [] denotes the syntax of the argument [x], [[]] indicates + that it can be omitted, [:] is the literal string [":"], where + [] is equal to [s], and [] is the grammar of the signature + [xs]. If a grammar includes several keyworded arguments they may + follow in an arbitrary order. Example, the grammar of {[ - args @@ - keyword "foo" string $ - keyword "bar" bitvec - }], + args @@ keyword "foo" string $ keyword "bar" bitvec + ]} + , recognizes the following strings, - - [""] - - [":foo hello"] - - [":bar 0x42"] - - [":foo hello :bar 0x42"] - - [":bar 0x42 :foo hello"] - *) - val keyword : string -> 'a arg -> 'a option arg - + - [""] + - [":foo hello"] + - [":bar 0x42"] + - [":foo hello :bar 0x42"] + - [":bar 0x42 :foo hello"] *) + val flag : string -> bool arg (** [flag x] a keyword [x] without arguments. - The syntax of [args xs $ flag s] is {[ [:]]}, where - [[]] indicates that it can be omitted, [:s] is the literal - string [":"], where [] is equal to [s], and [] is - the grammar of the signature [xs]. If a grammar includes - several flags they may follow in an arbitrary order. - + The syntax of [args xs $ flag s] is + {[ + [:] + ]} + , where [[]] indicates that it can be omitted, [:s] is the literal + string [":"], where [] is equal to [s], and [] is the + grammar of the signature [xs]. If a grammar includes several flags + they may follow in an arbitrary order. Example, the grammar of {[ - args @@ - flag "foo" $ - flag "bar" - }], + args @@ flag "foo" $ flag "bar" + ]} + , recognizes the following strings, - - [""] - - [":foo hello"] - - [":bar 0x42"] - - [":foo hello :bar 0x42"] - - [":bar 0x42 :foo hello"] - - and so on. - - *) - val flag : string -> bool arg - + - [""] + - [":foo hello"] + - [":bar 0x42"] + - [":foo hello :bar 0x42"] + - [":bar 0x42 :foo hello"] + - and so on. *) + val rest : 'a arg -> 'a list arg (** [rest x] a zero or more [x] arguments. - The syntax of [args xs $ rest x] is {[ []...]}, where - [[]...] indicates that an argument can be omitted or - repeated an arbitrary number of times, [] is syntax of - the agument [x], and [] is the grammar of the signature - [xs]. The [rest x] argument should be the last argument in - the signature, and any extensions of the resulting signature - will lead to an ambiguous grammar. + The syntax of [args xs $ rest x] is + {[ + []... + ]} + , where [[]...] indicates that an argument can be omitted or repeated + an arbitrary number of times, [] is syntax of the agument [x], and + [] is the grammar of the signature [xs]. The [rest x] argument + should be the last argument in the signature, and any extensions of + the resulting signature will lead to an ambiguous grammar. Example, the grammar of {[ args string $ rest bitvec - }], + ]} + , recognizes the following strings, - - ["hello"] - - ["hello 0x42"] - - ["hello 0x42 42"] - - and so on - *) - val rest : 'a arg -> 'a list arg - + - ["hello"] + - ["hello 0x42"] + - ["hello 0x42 42"] + - and so on *) (** Abstract Grammar descriptions.*) module Grammar : sig type t = grammar - (** [to_string grammar] is the textual representation of the [grammar]. *) val to_string : grammar -> string + (** [to_string grammar] is the textual representation of the [grammar]. + *) end end (**/**) + val restore_state : t -> unit + (**/**) end (** A self reflection. - This is a generative functor module refers to an information bundled with an application. - Use [include Self()] syntax to bring this definitions to the - scope. + This is a generative functor module refers to an information bundled with + an application. Use [include Self()] syntax to bring this definitions to + the scope. - It is designed to be used inside a plugin, but can be used in - a standalone program as well (this is useful, for debugging - plugins, by running them as a standalone applications). + It is designed to be used inside a plugin, but can be used in a standalone + program as well (this is useful, for debugging plugins, by running them as + a standalone applications). If run in a standalone mode, then field [name] would be set to - [Sys.executable_name] and [argv] to [Sys.argv]. - - Note: this module uses the [Event.Self()] module and extends it - with several more fields, such as [name], [version], [doc], and - [argv]. It is recommended to use [Event.Self()] aka - [Bap_main_event.Self()] instead. - *) - module Self() : sig + {!Sys.executable_name} and [argv] to {!Sys.argv}. - (** [name of a plugin] *) + Note: this module uses the [Event.Self()] module and extends it with + several more fields, such as [name], [version], [doc], and [argv]. It is + recommended to use [Event.Self()] aka [Bap_main_event.Self()] instead. *) + module Self () : sig val name : string + (** [name of a plugin] *) - (** [version number] *) val version : string + (** [version number] *) - (** A short, one-line description *) val doc : string + (** A short, one-line description *) - (** [args name] returns an array of arguments designated for a - plugin with a given [name]. - - The arguments will be extracted from [Sys.argv] array by - removing all arguments that doesn't start with - [--name-]. Then, from all command arguments that are left, the - [--name-] prefix is substituted with [--]. For example, if - [argv] contained [ [| "bap"; "-lcallgraph"; "--callgraph" - "--callgraph-help"|]] then pass that registered itself under - [callgraph] name will receive the following array of arguments - [ [| "callgraph"; --help |] ]. That means, that plugins can't - accept arguments that are anonymous or short options *) val argv : string array + (** [args name] returns an array of arguments designated for a plugin with a + given [name]. + + The arguments will be extracted from [Sys.argv] array by removing all + arguments that doesn't start with [--name-]. Then, from all command + arguments that are left, the [--name-] prefix is substituted with [--]. + For example, if [argv] contained + [ [| "bap"; "-lcallgraph"; "--callgraph" "--callgraph-help"|]] then pass + that registered itself under [callgraph] name will receive the following + array of arguments [ [| "callgraph"; --help |] ]. That means, that + plugins can't accept arguments that are anonymous or short options *) - (** [debug fmt ...] send a debug message *) - val debug : ('a,Format.formatter,unit) format -> 'a + val debug : ('a, Format.formatter, unit) format -> 'a + (** [debug fmt ...] send a debug message *) - (** [info fmt ...] send an info message *) - val info : ('a,Format.formatter,unit) format -> 'a + val info : ('a, Format.formatter, unit) format -> 'a + (** [info fmt ...] send an info message *) - (** [warning fmt ...] send a warning message *) - val warning : ('a,Format.formatter,unit) format -> 'a + val warning : ('a, Format.formatter, unit) format -> 'a + (** [warning fmt ...] send a warning message *) - (** [error fmt ...] send an error message *) - val error : ('a,Format.formatter,unit) format -> 'a + val error : ('a, Format.formatter, unit) format -> 'a + (** [error fmt ...] send an error message *) - (** formatter that sends debug messages *) val debug_formatter : Format.formatter + (** formatter that sends debug messages *) - (** formatter that sends info messages *) val info_formatter : Format.formatter + (** formatter that sends info messages *) - (** formatter that sends warning messages *) val warning_formatter : Format.formatter + (** formatter that sends warning messages *) - (** formatter that sends error messages *) val error_formatter : Format.formatter + (** formatter that sends error messages *) - (** [report_progress ~task:t ~note:n ~state:s ~total:s' ()] reports - a progress of the task [t]. - - Reports that the task [t] made a progress to the stage [s] out - the total number of stages [s']. The note [n] may provide an - additional textual explanation of the current stage. The report - doesn't mean that the stage is finished, but rather that it is - entered. Thus for [s'] stages we expect to receive [s'-1] - reports. (This approach works fine with functional programming - and iterating - as in functional programming it is more - convenient to report before computation, and during the indexed - iteration the index of the last element is one less than the - total number of elements). - - All parameters are optional, and have the following default - values if not specified: + val report_progress : + ?task:string -> ?note:string -> ?stage:int -> ?total:int -> unit -> unit + (** [report_progress ~task:t ~note:n ~state:s ~total:s' ()] reports a + progress of the task [t]. + + Reports that the task [t] made a progress to the stage [s] out the total + number of stages [s']. The note [n] may provide an additional textual + explanation of the current stage. The report doesn't mean that the stage + is finished, but rather that it is entered. Thus for [s'] stages we + expect to receive [s'-1] reports. (This approach works fine with + functional programming and iterating - as in functional programming it + is more convenient to report before computation, and during the indexed + iteration the index of the last element is one less than the total + number of elements). + + All parameters are optional, and have the following default values if + not specified: @param task defaults to the plugin [name]; @param note defaults to the empty string; @param stage defaults to [None] - @param total defaults to [None] or to the last value of this - parameter for the given task. + @param total + defaults to [None] or to the last value of this parameter for the + given task. - The [report_progress] bar is an easy way to provide some - feedback to the system, either in the form of a progress (if the - total number of stages is known) or in the form of a friendly - ping back. + The [report_progress] bar is an easy way to provide some feedback to the + system, either in the form of a progress (if the total number of stages + is known) or in the form of a friendly ping back. - The mechanism should be used by analyses that expect to take - some time to complete. Usually, one plugin implements only one - task, so the task name may be omitted. If an analysis is built - from several tasks, then they can be represented as subtasks, - and the main task should represent the whole work. + The mechanism should be used by analyses that expect to take some time + to complete. Usually, one plugin implements only one task, so the task + name may be omitted. If an analysis is built from several tasks, then + they can be represented as subtasks, and the main task should represent + the whole work. Example: {[ - let find_interesting_points prog = report_progress ~task:"discover" ~total:(Term.length sub_t prog) (); - Term.enum sub_t prog |> Seq.concat_mapi ~f:(fun stage sub -> - report_progress ~note:(Sub.name sub) ~task:"discover" ~stage (); - interesting_points_of_sub sub) + Term.enum sub_t prog + |> Seq.concat_mapi ~f:(fun stage sub -> + report_progress ~note:(Sub.name sub) ~task:"discover" ~stage + (); + interesting_points_of_sub sub) let check_interesting_points points = report_progress ~task:"checking" ~total:(Seq.length points) (); Seq.iteri ~f:(fun stage p -> report_progress ~note:(Point.name p) ~task:"checking" ~stage (); check_point p) - ]} - - *) - val report_progress : - ?task:string -> - ?note:string -> - ?stage:int -> - ?total:int -> unit -> unit - + ]} *) (** This module allows plugins to access BAP configuration variables. - When reading the values for the configuration variables, the - decreasing order of precedence for the values is: + When reading the values for the configuration variables, the decreasing + order of precedence for the values is: - Command line arguments - Environment variables - Configuration file @@ -11499,150 +10851,160 @@ module Std : sig Example usage: {[ - let path = Config.(param string ~doc:"a path to file" - ~default:"input.txt" "path") - let debug = Config.(flag (* ... *) ) + let path = + Config.( + param string ~doc:"a path to file" ~default:"input.txt" "path") + + let debug = Config.(flag (* ... *)) (* ... *) let main () = - Config.when_ready - (fun {Config.get=(!)} -> - do_stuff !path !debug (* ... *) - ) - ]} - *) + Config.when_ready (fun { Config.get = ( ! ) } -> + do_stuff !path !debug (* ... *)) + ]} *) module Config : sig - - (** Version number *) val version : string + (** Version number *) - (** A directory for bap specific read-only architecture - independent data files. *) val datadir : string + (** A directory for bap specific read-only architecture independent data + files. *) - (** A directory for bap specific object files, libraries, and - internal binaries that are not intended to be executed directly - by users or shell scripts *) val libdir : string + (** A directory for bap specific object files, libraries, and internal + binaries that are not intended to be executed directly by users or + shell scripts *) - (** A directory for bap specific configuration files *) val confdir : string + (** A directory for bap specific configuration files *) - (** An abstract parameter type that can be later read using a reader *) type 'a param + (** An abstract parameter type that can be later read using a reader *) - (** Parse a string to an 'a *) type 'a parser = string -> [ `Ok of 'a | `Error of string ] + (** Parse a string to an 'a *) - (** Type for converting [string] <-> ['a]. Also defines a default - value for the ['a] type. *) type 'a converter + (** Type for converting [string] <-> ['a]. Also defines a default value + for the ['a] type. *) - val converter : 'a parser -> (Format.formatter -> 'a -> unit) -> 'a -> 'a converter + val converter : + 'a parser -> (Format.formatter -> 'a -> unit) -> 'a -> 'a converter + val deprecated : string (** Default deprecation warning message, for easy deprecation of parameters. *) - val deprecated : string - (** [param conv ~default ~docv ~doc name] creates a parameter - which is referred to on the command line, environment - variable, and config file using the value of [name], with - the type defined by [conv], using the [default] value if - unspecified by user. + val param : + 'a converter -> + ?deprecated:string -> + ?default:'a -> + ?as_flag:'a -> + ?docv:string -> + ?doc:string -> + ?synonyms:string list -> + string -> + 'a param + (** [param conv ~default ~docv ~doc name] creates a parameter which is + referred to on the command line, environment variable, and config file + using the value of [name], with the type defined by [conv], using the + [default] value if unspecified by user. - The [default] is optional, and falls back to the - default defined by [conv]. + The [default] is optional, and falls back to the default defined by + [conv]. - [doc] is the man page information of the argument. The - variable ["$(docv)"] can be used to refer to the value of - [docv]. [docv] is a variable name used in the man page to - stand for their value. + [doc] is the man page information of the argument. The variable + ["$(docv)"] can be used to refer to the value of [docv]. [docv] is a + variable name used in the man page to stand for their value. - A user can optionally add [deprecated] to a parameter that - is to be deprecated soon. This will cause the parameter to be - usable as normal, but will emit a warning to the user if they - try to use it. - Example usage: [Config.(param ~deprecated int "--old")]. + A user can optionally add [deprecated] to a parameter that is to be + deprecated soon. This will cause the parameter to be usable as normal, + but will emit a warning to the user if they try to use it. Example + usage: [Config.(param ~deprecated int "--old")]. - Additionally, [synonyms] can be added to allow multiple - arguments referring to the same parameters. However, this is - usually discouraged, and considered proper usage only in rare - scenarios. + Additionally, [synonyms] can be added to allow multiple arguments + referring to the same parameters. However, this is usually + discouraged, and considered proper usage only in rare scenarios. - Also, a developer can use the [~as_flag] to specify a - default value that the argument takes if it is used like a - flag. This behaviour can be understood better through the - following example. + Also, a developer can use the [~as_flag] to specify a default value + that the argument takes if it is used like a flag. This behaviour can + be understood better through the following example. - Consider [Config.(param (some int) ~as_flag:(Some 10) "x")]. + Consider [Config.(param (some int) ~as_flag:(Some 10) "x")]. - This results in 3 possible command line invocations: + This results in 3 possible command line invocations: - 1. No [--x] - Results in [default] value (specifically - here, [None]). + 1. No [--x] - Results in [default] value (specifically here, [None]). - 2. Only [--x] - This causes it to have the value [as_flag] - (specifically here,[Some 10]). + 2. Only [--x] - This causes it to have the value [as_flag] + (specifically here,[Some 10]). - 3. [--x=20] - This causes it to have the value from the - command line (specifically here, [Some 20]). - *) - val param : - 'a converter -> ?deprecated:string -> ?default:'a -> ?as_flag:'a -> - ?docv:string -> ?doc:string -> ?synonyms:string list -> - string -> 'a param + 3. [--x=20] - This causes it to have the value from the command line + (specifically here, [Some 20]). *) - (** Create a parameter which accepts a list at command line by - repetition of argument. Similar to [param (list 'a) ...] - in all other respects. Defaults to an empty list if unspecified. *) val param_all : - 'a converter -> ?deprecated:string -> ?default:'a list -> ?as_flag:'a -> - ?docv:string -> ?doc:string -> - ?synonyms:string list -> string -> 'a list param + 'a converter -> + ?deprecated:string -> + ?default:'a list -> + ?as_flag:'a -> + ?docv:string -> + ?doc:string -> + ?synonyms:string list -> + string -> + 'a list param + (** Create a parameter which accepts a list at command line by repetition + of argument. Similar to [param (list 'a) ...] in all other respects. + Defaults to an empty list if unspecified. *) - (** Create a boolean parameter that is set to true if user - mentions it in the command line arguments *) val flag : ?deprecated:string -> - ?docv:string -> ?doc:string -> ?synonyms:string list -> - string -> bool param + ?docv:string -> + ?doc:string -> + ?synonyms:string list -> + string -> + bool param + (** Create a boolean parameter that is set to true if user mentions it in + the command line arguments *) - (** Provides a future determined on when the config can be read *) val determined : 'a param -> 'a future + (** Provides a future determined on when the config can be read *) + type reader = { get : 'a. 'a param -> 'a } (** A witness that can read configured params *) - type reader = {get : 'a. 'a param -> 'a} - - (** [declare_extension ~features ~provides ~doc when_ready] - declares a BAP extension. - This function is a wrapper for {!Bap_main.Extension.declare} - that simplifies transition from the old (this one) - configuration system to the modern {!Bap_main.Extension}. - - It acts as {!when_ready} but you can also specify feature - tags and documentation, see {!Bap_main.Extension.declare} for more - information. - - @since 2.6.0 - *) val declare_extension : ?features:string list -> ?provides:string list -> ?doc:string -> (reader -> unit) -> unit + (** [declare_extension ~features ~provides ~doc when_ready] declares a BAP + extension. + This function is a wrapper for {!Bap_main.Extension.declare} that + simplifies transition from the old (this one) configuration system to + the modern {!Bap_main.Extension}. + + It acts as {!when_ready} but you can also specify feature tags and + documentation, see {!Bap_main.Extension.declare} for more information. + + @since 2.6.0 *) - (** [when_ready f] requests the system to call function [f] once - configuration parameters are established and stabilized. An - access function will be passed to the function [f], that can be - used to safely dereference parameters. *) val when_ready : (reader -> unit) -> unit [@@deprecated "[since 2022-09] use declare_extension"] + (** [when_ready f] requests the system to call function [f] once + configuration parameters are established and stabilized. An access + function will be passed to the function [f], that can be used to + safely dereference parameters. + @deprecated since 2022-09: use {!declare_extension} instead *) - + type manpage_block = + [ `I of string * string + | `Noblank + | `P of string + | `Pre of string + | `S of string ] (** The type for a block of man page text. - [`S s] introduces a new section [s]. @@ -11651,122 +11013,123 @@ module Std : sig - [`I (l,t)] is an indented paragraph with label [l] and text [t]. - [`Noblank] suppresses the blank line introduced between two blocks. - Except in [`Pre], whitespace and newlines are not significant - and are all collapsed to a single space. In labels [l] and text - strings [t], the syntax ["$(i,italic text)"] and ["$(b,bold - text)"] can be used to respectively produce italic and bold - text. *) - type manpage_block = [ - | `I of string * string - | `Noblank - | `P of string - | `Pre of string - | `S of string - ] + Except in [`Pre], whitespace and newlines are not significant and are + all collapsed to a single space. In labels [l] and text strings [t], + the syntax ["$(i,italic text)"] and ["$(b,bold text)"] can be used to + respectively produce italic and bold text. *) - (** Create a manpage for the plugin *) val manpage : manpage_block list -> unit + (** Create a manpage for the plugin *) - (** [bool] converts values with {!bool_of_string}. *) val bool : bool converter + (** [bool] converts values with {!bool_of_string}. *) - (** [char] converts values by ensuring the argument has a single char. *) val char : char converter + (** [char] converts values by ensuring the argument has a single char. *) - (** [int] converts values with {!int_of_string}. *) val int : int converter + (** [int] converts values with {!int_of_string}. *) - (** [nativeint] converts values with {!Nativeint.of_string}. *) val nativeint : nativeint converter + (** [nativeint] converts values with {!Nativeint.of_string}. *) - (** [int32] converts values with {!Int32.of_string}. *) val int32 : int32 converter + (** [int32] converts values with {!Int32.of_string}. *) - (** [int64] converts values with {!Int64.of_string}. *) val int64 : int64 converter + (** [int64] converts values with {!Int64.of_string}. *) - (** [float] converts values with {!float_of_string}. *) val float : float converter + (** [float] converts values with {!float_of_string}. *) - (** [string] converts values with the identity function. *) val string : string converter + (** [string] converts values with the identity function. *) - (** [enum l] converts values such that unambiguous prefixes of - string names in [l] map to the corresponding value of type ['a]. + val enum : (string * 'a) list -> 'a converter + (** [enum l] converts values such that unambiguous prefixes of string + names in [l] map to the corresponding value of type ['a]. {b Warning.} The type ['a] must be comparable with {!Pervasives.compare}. @raise Invalid_argument if [l] is empty. *) - val enum : (string * 'a) list -> 'a converter - (** [doc_enum l] documents the possible string names in the [l] - map according to the number of alternatives. If [quoted] is - [true] (default), the tokens are quoted. The resulting - string can be used in sentences of the form ["$(docv) must - be %s"]. *) val doc_enum : ?quoted:bool -> (string * 'a) list -> string + (** [doc_enum l] documents the possible string names in the [l] map + according to the number of alternatives. If [quoted] is [true] + (default), the tokens are quoted. The resulting string can be used in + sentences of the form ["$(docv) must be %s"]. *) - (** [file] converts a value with the identity function and - checks with {!Sys.file_exists} that a file with that name exists. *) val file : string converter + (** [file] converts a value with the identity function and checks with + {!Sys.file_exists} that a file with that name exists. *) - (** [dir] converts a value with the identity function and checks - with {!Sys.file_exists} and {!Sys.is_directory} - that a directory with that name exists. *) val dir : string converter + (** [dir] converts a value with the identity function and checks with + {!Sys.file_exists} and {!Sys.is_directory} that a directory with that + name exists. *) - (** [non_dir_file] converts a value with the identity function and checks - with {!Sys.file_exists} and {!Sys.is_directory} - that a non directory file with that name exists. *) val non_dir_file : string converter + (** [non_dir_file] converts a value with the identity function and checks + with {!Sys.file_exists} and {!Sys.is_directory} that a non directory + file with that name exists. *) + val list : ?sep:char -> 'a converter -> 'a list converter (** [list sep c] splits the argument at each [sep] (defaults to [',']) character and converts each substrings with [c]. *) - val list : ?sep:char -> 'a converter -> 'a list converter + val array : ?sep:char -> 'a converter -> 'a array converter (** [array sep c] splits the argument at each [sep] (defaults to [',']) character and converts each substring with [c]. *) - val array : ?sep:char -> 'a converter -> 'a array converter + val pair : + ?sep:char -> 'a converter -> 'b converter -> ('a * 'b) converter (** [pair sep c0 c1] splits the argument at the {e first} [sep] character - (defaults to [',']) and respectively converts the substrings with - [c0] and [c1]. *) - val pair : ?sep:char -> 'a converter -> 'b converter -> ('a * 'b) converter + (defaults to [',']) and respectively converts the substrings with [c0] + and [c1]. *) - (** {!t2} is {!pair}. *) val t2 : ?sep:char -> 'a converter -> 'b converter -> ('a * 'b) converter + (** {!t2} is {!pair}. *) + val t3 : + ?sep:char -> + 'a converter -> + 'b converter -> + 'c converter -> + ('a * 'b * 'c) converter (** [t3 sep c0 c1 c2] splits the argument at the {e first} two [sep] characters (defaults to [',']) and respectively converts the substrings with [c0], [c1] and [c2]. *) - val t3 : ?sep:char -> 'a converter -> 'b converter -> 'c converter -> - ('a * 'b * 'c) converter + val t4 : + ?sep:char -> + 'a converter -> + 'b converter -> + 'c converter -> + 'd converter -> + ('a * 'b * 'c * 'd) converter (** [t4 sep c0 c1 c2 c3] splits the argument at the {e first} three [sep] characters (defaults to [',']) respectively converts the substrings with [c0], [c1], [c2] and [c3]. *) - val t4 : ?sep:char -> 'a converter -> 'b converter -> 'c converter -> - 'd converter -> ('a * 'b * 'c * 'd) converter - (** [some none c] is like the converter [c] except it returns - [Some] value. It is used for command line arguments - that default to [None] when absent. [none] is what to print to - document the absence (defaults to [""]). *) val some : ?none:string -> 'a converter -> 'a option converter - + (** [some none c] is like the converter [c] except it returns [Some] + value. It is used for command line arguments that default to [None] + when absent. [none] is what to print to document the absence (defaults + to [""]). *) end - end + module Log : sig val start : ?logdir:string -> unit -> unit end [@@deprecated "[since 2019-11] use Bap_main.init log or Events module"] - (**/**) + module Monad : module type of Legacy.Monad - [@@deprecated "[since 2018-03] use the `monads' library instead of this module"] - (**/**) + [@@deprecated + "[since 2018-03] use the `monads' library instead of this module"] + (**/**) end diff --git a/lib/bap/bap_init_toplevel.ml b/lib/bap/bap_init_toplevel.ml index eb579b898..83d34da27 100644 --- a/lib/bap/bap_init_toplevel.ml +++ b/lib/bap/bap_init_toplevel.ml @@ -2,27 +2,22 @@ open Bap_plugins.Std let install_printer printer = Topdirs.dir_install_printer Format.err_formatter - (Longident.parse printer) -[@@warning "-D"] + (Option.get @@ Longident.unflatten printer) -let install_printers () = - Core_kernel.Pretty_printer.all () |> - List.iter install_printer -[@@warning "-D"] +let install_printers () = Core.Pretty_printer.all () |> install_printer let main () = let module Bap_std_is_required = Bap.Std in - let module Core_kernel_is_required = Core_kernel[@warning "-D"] in + let module Core_is_required = Core in let loader = Topdirs.dir_load Format.err_formatter in setup_dynamic_loader loader; - match Bap_main.init ~argv: [|"baptop"|] () with + match Bap_main.init ~argv:[| "baptop" |] () with | Ok () -> - install_printers (); - () + install_printers (); + () | Error failed -> - Format.eprintf "Failed to initialize BAP: %a@\n%!" - Bap_main.Extension.Error.pp failed; - exit 1 - + Format.eprintf "Failed to initialize BAP: %a@\n%!" + Bap_main.Extension.Error.pp failed; + exit 1 let () = main () diff --git a/lib/bap/bap_project.ml b/lib/bap/bap_project.ml index 8327749ff..8ae81fcb1 100644 --- a/lib/bap/bap_project.ml +++ b/lib/bap/bap_project.ml @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap_core_theory open Graphlib.Std @@ -10,17 +10,15 @@ open Bap_disasm_std open Bap_sema.Std open Or_error.Monad_infix open Format - module Driver = Bap_disasm_driver - -module Buffer = Caml.Buffer -include Bap_self.Create() +module Buffer = Stdlib.Buffer +include Bap_self.Create () let query doc attr = match Ogre.eval (Ogre.request attr) doc with | Error err -> - invalid_argf "Malformed ogre specification: %s" - (Error.to_string_hum err) () + invalid_argf "Malformed ogre specification: %s" (Error.to_string_hum err) + () | Ok bias -> bias let target_of_spec spec = @@ -30,34 +28,32 @@ let target_of_spec spec = KB.collect Theory.Unit.target unit let union_memory m1 m2 = - Memmap.to_sequence m2 |> Seq.fold ~init:m1 ~f:(fun m1 (mem,v) -> - Memmap.add m1 mem v) + Memmap.to_sequence m2 + |> Seq.fold ~init:m1 ~f:(fun m1 (mem, v) -> Memmap.add m1 mem v) -let memory_slot = KB.Class.property Theory.Unit.cls "unit-memory" - ~package:"bap" - ~public:true - ~desc:"annotated memory regions of the unit" - Memmap.domain +let memory_slot = + KB.Class.property Theory.Unit.cls "unit-memory" ~package:"bap" ~public:true + ~desc:"annotated memory regions of the unit" Memmap.domain let with_filename spec target _code memory path f = let open KB.Syntax in let width = Theory.Target.code_addr_size target in - let bias = query spec Image.Scheme.bias |> Option.map - ~f:(fun x -> Bitvec.(int64 x mod modulus width)) in + let bias = + query spec Image.Scheme.bias + |> Option.map ~f:(fun x -> Bitvec.(int64 x mod modulus width)) + in Theory.Unit.for_file path >>= fun unit -> - KB.sequence [ - KB.provide Image.Spec.slot unit spec; - KB.provide Theory.Unit.bias unit bias; - KB.provide Theory.Unit.target unit target; - KB.provide Image.Spec.slot unit spec; - KB.provide Theory.Unit.path unit (Some path); - KB.provide memory_slot unit memory; - ] >>= fun () -> - KB.promising Theory.Label.unit ~promise:(fun _ -> - !!(Some unit)) f - - - + KB.sequence + [ + KB.provide Image.Spec.slot unit spec; + KB.provide Theory.Unit.bias unit bias; + KB.provide Theory.Unit.target unit target; + KB.provide Image.Spec.slot unit spec; + KB.provide Theory.Unit.path unit (Some path); + KB.provide memory_slot unit memory; + ] + >>= fun () -> + KB.promising Theory.Label.unit ~promise:(fun _ -> !!(Some unit)) f module State = struct open KB.Syntax @@ -65,74 +61,65 @@ module State = struct module Sub = Bap_disasm_calls module Rec = Disasm_expert.Recursive - type t = { - disassembly : Dis.state; - subroutines : Sub.t; - } [@@deriving bin_io] + type t = { disassembly : Dis.state; subroutines : Sub.t } [@@deriving bin_io] - let empty = { - disassembly = Dis.init; - subroutines = Sub.empty; - } + let empty = { disassembly = Dis.init; subroutines = Sub.empty } let equal x y = - Dis.equal x.disassembly y.disassembly && - Sub.equal x.subroutines y.subroutines - + Dis.equal x.disassembly y.disassembly + && Sub.equal x.subroutines y.subroutines let disassemble self mem = Dis.scan mem self.disassembly >>| fun disassembly -> - {self with disassembly} + { self with disassembly } let partition self = - let self = { - self with - disassembly = Dis.forget_debt self.disassembly - } in + let self = { self with disassembly = Dis.forget_debt self.disassembly } in Sub.update self.subroutines self.disassembly >>| fun subroutines -> - {self with subroutines} + { self with subroutines } - let symbols {disassembly; subroutines} = + let symbols { disassembly; subroutines } = Symtab.create disassembly subroutines - let cfg {disassembly} = - Disasm_expert.Recursive.global_cfg disassembly - - let disassembly {disassembly=d} = d - let subroutines {subroutines=s} = s - - let set_length set = - Sexp.Atom (string_of_int @@ Set.length set) - - let inspect {disassembly; subroutines} = Sexp.List [ - List [ - Atom ":number-of-basic-blocks"; - set_length (Dis.blocks disassembly); - ]; - List [ - Atom ":number-of-subroutines"; - set_length (Sub.entries subroutines); + let cfg { disassembly } = Disasm_expert.Recursive.global_cfg disassembly + let disassembly { disassembly = d } = d + let subroutines { subroutines = s } = s + let set_length set = Sexp.Atom (string_of_int @@ Set.length set) + + let inspect { disassembly; subroutines } = + Sexp.List + [ + List + [ + Atom ":number-of-basic-blocks"; set_length (Dis.blocks disassembly); + ]; + List + [ + Atom ":number-of-subroutines"; set_length (Sub.entries subroutines); + ]; ] - ] - let slot = KB.Class.property Theory.Unit.cls - ~package:"bap" "disassembly" - ~persistent:(KB.Persistent.of_binable(module struct - type nonrec t = t [@@deriving bin_io] - end)) @@ - KB.Domain.flat ~empty ~equal "disassembly" ~inspect + let slot = + KB.Class.property Theory.Unit.cls ~package:"bap" "disassembly" + ~persistent: + (KB.Persistent.of_binable + (module struct + type nonrec t = t [@@deriving bin_io] + end)) + @@ KB.Domain.flat ~empty ~equal "disassembly" ~inspect module Toplevel = struct let compute_target ?file spec : Theory.target = let result = Toplevel.var "target" in - let create_unit = match file with + let create_unit = + match file with | None | Some "" -> KB.Object.create Theory.Unit.cls - | Some file -> Theory.Unit.for_file file in - Toplevel.put result begin - let* unit = create_unit in - KB.provide Image.Spec.slot unit spec >>= fun () -> - KB.collect Theory.Unit.target unit - end; + | Some file -> Theory.Unit.for_file file + in + Toplevel.put result + (let* unit = create_unit in + KB.provide Image.Spec.slot unit spec >>= fun () -> + KB.collect Theory.Unit.target unit); Toplevel.get result end end @@ -140,59 +127,58 @@ end type state = State.t [@@deriving bin_io] type unit_info = { - spec : Ogre.doc; - arch : arch; - target : Theory.target; - state : State.t; - disasm : disasm; - memory : value memmap; + spec : Ogre.doc; + arch : arch; + target : Theory.target; + state : State.t; + disasm : disasm; + memory : value memmap; symbols : symtab; -} [@@deriving fields] - -type library = { - unit : Theory.Unit.t; - info : unit_info; } +[@@deriving fields] + +type library = { unit : Theory.Unit.t; info : unit_info } type t = { - main : unit_info; + main : unit_info; libraries : library list; - storage : dict; - program : program term; - passes : string list; -} [@@deriving fields] + storage : dict; + program : program term; + passes : string list; +} +[@@deriving fields] module Library = struct type t = library - let unit {unit} = unit - let specification {info} = spec info - let arch {info} = arch info - let target {info} = target info - let state {info} = state info - let disasm {info} = disasm info - let memory {info} = memory info - let symbols {info} = symbols info + let unit { unit } = unit + let specification { info } = spec info + let arch { info } = arch info + let target { info } = target info + let state { info } = state info + let disasm { info } = disasm info + let memory { info } = memory info + let symbols { info } = symbols info end -let spec {main} = spec main -let arch {main} = arch main -let target {main} = target main -let state {main} = state main -let disasm {main} = disasm main -let memory {main} = memory main -let symbols {main} = symbols main +let spec { main } = spec main +let arch { main } = arch main +let target { main } = target main +let state { main } = state main +let disasm { main } = disasm main +let memory { main } = memory main +let symbols { main } = symbols main module Info = struct - let file,got_file = Stream.create () - let arch,got_arch = Stream.create () - let data,got_data = Stream.create () - let code,got_code = Stream.create () - let img, got_img = Stream.create () - let cfg, got_cfg = Stream.create () - let symtab,got_symtab = Stream.create () - let program,got_program = Stream.create () - let spec,got_spec = Stream.create () + let file, got_file = Stream.create () + let arch, got_arch = Stream.create () + let data, got_data = Stream.create () + let code, got_code = Stream.create () + let img, got_img = Stream.create () + let cfg, got_cfg = Stream.create () + let symtab, got_symtab = Stream.create () + let program, got_program = Stream.create () + let spec, got_spec = Stream.create () end module Input = struct @@ -209,36 +195,41 @@ module Input = struct type t = unit -> result list + let custom ?(finish = Fn.id) ?(filename = "") ?(code = Memmap.empty) + ?(data = Memmap.empty) target () = + { + arch = `unknown; + file = filename; + code; + data; + finish; + target; + spec = Ogre.Doc.empty; + memory = union_memory code data; + } + |> List.return - let custom - ?(finish=Fn.id) - ?(filename="") - ?(code=Memmap.empty) - ?(data=Memmap.empty) target () = { - arch=`unknown; file=filename; code; data; finish; - target; - spec = Ogre.Doc.empty; - memory = union_memory code data; - } |> List.return - - let create - ?(finish=Fn.id) arch file ~code ~data () = - let spec = match arch with + let create ?(finish = Fn.id) arch file ~code ~data () = + let spec = + match arch with | #Arch.unknown -> Ogre.Doc.empty - | arch -> Image.Spec.from_arch arch in { - arch; file; code; data; finish; + | arch -> Image.Spec.from_arch arch + in + { + arch; + file; + code; + data; + finish; target = State.Toplevel.compute_target ~file spec; memory = union_memory code data; spec; - } |> List.return + } + |> List.return let loaders = String.Table.create () - let register_loader name loader = - Hashtbl.set loaders ~key:name ~data:loader - - let is_code v = - Option.is_some @@ - Value.get Image.code_region v + let register_loader name loader = Hashtbl.set loaders ~key:name ~data:loader + let is_code v = Option.is_some @@ Value.get Image.code_region v let is_data v = match Value.get Image.segment v with @@ -250,40 +241,41 @@ module Input = struct | Some t when not (Theory.Target.is_unknown t) -> t | _ -> State.Toplevel.compute_target ?file spec - let result_of_image ?target finish file img = { - arch = Image.arch img; - code = Memmap.filter ~f:is_code (Image.memory img); - data = Memmap.filter ~f:is_data (Image.memory img); - memory = Image.memory img; - file; - finish; - spec = Image.spec img; - target = compute_target ~file ?target (Image.spec img); - } + let result_of_image ?target finish file img = + { + arch = Image.arch img; + code = Memmap.filter ~f:is_code (Image.memory img); + data = Memmap.filter ~f:is_data (Image.memory img); + memory = Image.memory img; + file; + finish; + spec = Image.spec img; + target = compute_target ~file ?target (Image.spec img); + } let dedup xs = - List.rev @@ fst @@ - List.fold xs ~init:([], String.Set.empty) ~f:(fun (xs, mems) x -> - if Set.mem mems x then (xs, mems) - else (x :: xs, Set.add mems x)) + List.rev @@ fst + @@ List.fold xs ~init:([], String.Set.empty) ~f:(fun (xs, mems) x -> + if Set.mem mems x then (xs, mems) else (x :: xs, Set.add mems x)) let provide_bias = Toplevel.var "provide-bias" + let provide_bias file = function | None -> Ok () - | Some bias -> try - let open KB.Syntax in - let bias = Word.to_bitvec bias in - Toplevel.put provide_bias begin - Theory.Unit.for_file file >>= fun unit -> - KB.collect Theory.Unit.bias unit >>= function - | Some _ -> !!() - | None -> - info "providing bias %a to unit %s" Bitvec.pp bias file; - KB.provide Theory.Unit.bias unit @@ Some bias - end; - Ok (Toplevel.get provide_bias) - with Toplevel.Conflict c -> - Or_error.errorf "%s: %s" file @@ KB.Conflict.to_string c + | Some bias -> ( + try + let open KB.Syntax in + let bias = Word.to_bitvec bias in + Toplevel.put provide_bias + ( Theory.Unit.for_file file >>= fun unit -> + KB.collect Theory.Unit.bias unit >>= function + | Some _ -> !!() + | None -> + info "providing bias %a to unit %s" Bitvec.pp bias file; + KB.provide Theory.Unit.bias unit @@ Some bias ); + Ok (Toplevel.get provide_bias) + with Toplevel.Conflict c -> + Or_error.errorf "%s: %s" file @@ KB.Conflict.to_string c) let page_align_up x = let width = Addr.bitwidth x in @@ -295,69 +287,82 @@ module Input = struct match Memmap.(min_addr memmap, max_addr memmap) with | None, _ | _, None -> bias | Some lo, Some hi -> - Option.value_map bias ~default:hi ~f:(fun bias -> - let size = Addr.((hi - lo) ++ 1) in - Addr.(bias + size)) |> - page_align_up |> Option.some + Option.value_map bias ~default:hi ~f:(fun bias -> + let size = Addr.(hi - lo ++ 1) in + Addr.(bias + size)) + |> page_align_up |> Option.some let of_image ?target ?loader ?(libraries = []) main = let rec aux ?bias acc = function | [] -> Ok (List.rev acc) | filename :: rest -> - provide_bias filename bias >>= fun () -> - Image.create ?backend:loader filename >>= fun (img,warns) -> - List.iter warns ~f:(fun e -> warning "%a" Error.pp e); - let bias = next_bias img ?bias in - let spec = Image.spec img in - Signal.send Info.got_img img; - let finish proj = { - proj with - storage = Dict.set proj.storage Image.specification spec; - program = - Term.map sub_t proj.program ~f:(fun sub -> - match Term.get_attr sub address with - | Some a when Addr.equal a (Image.entry_point img) -> - Term.set_attr sub Sub.entry_point () - | _ -> sub) - } in - let res = result_of_image ?target finish filename img in - aux (res :: acc) rest ?bias in + provide_bias filename bias >>= fun () -> + Image.create ?backend:loader filename >>= fun (img, warns) -> + List.iter warns ~f:(fun e -> warning "%a" Error.pp e); + let bias = next_bias img ?bias in + let spec = Image.spec img in + Signal.send Info.got_img img; + let finish proj = + { + proj with + storage = Dict.set proj.storage Image.specification spec; + program = + Term.map sub_t proj.program ~f:(fun sub -> + match Term.get_attr sub address with + | Some a when Addr.equal a (Image.entry_point img) -> + Term.set_attr sub Sub.entry_point () + | _ -> sub); + } + in + let res = result_of_image ?target finish filename img in + aux (res :: acc) rest ?bias + in aux [] @@ dedup (main :: libraries) let from_image ?target ?loader ?(libraries = []) main () = of_image ?target ?loader main ~libraries |> ok_exn - let load ?target ?loader ?(libraries = []) main = match loader with + let load ?target ?loader ?(libraries = []) main = + match loader with | None -> from_image ?target main ~libraries - | Some name -> match Hashtbl.find loaders name with - | None -> from_image ?target ?loader main ~libraries - | Some load -> fun () -> - List.bind (main :: dedup libraries) ~f:(Fn.flip load ()) + | Some name -> ( + match Hashtbl.find loaders name with + | None -> from_image ?target ?loader main ~libraries + | Some load -> + fun () -> List.bind (main :: dedup libraries) ~f:(Fn.flip load ())) let file ?loader ~filename = load ?loader filename - let raw ?(target=Theory.Target.unknown) ?(filename="") ?base arch big () = + let raw ?(target = Theory.Target.unknown) ?(filename = "") ?base arch big () = if Bigstring.length big = 0 then invalid_arg "file is empty"; - let addr_size = if Theory.Target.is_unknown target - then Arch.addr_size arch |> Size.in_bits - else Theory.Target.code_addr_size target in - let endian = if Theory.Target.is_unknown target - then Arch.endian arch else - if Theory.Endianness.(Theory.Target.endianness target = le) - then LittleEndian else BigEndian in + let addr_size = + if Theory.Target.is_unknown target then + Arch.addr_size arch |> Size.in_bits + else Theory.Target.code_addr_size target + in + let endian = + if Theory.Target.is_unknown target then Arch.endian arch + else if Theory.Endianness.(Theory.Target.endianness target = le) then + LittleEndian + else BigEndian + in let base = Option.value base ~default:(Addr.zero addr_size) in let mem = Memory.create endian base big |> ok_exn in let section = Value.create Image.section "bap.user" in let code = Memmap.add Memmap.empty mem section in - let data = Memmap.empty in - let spec = Image.Spec.from_arch arch in { + let data = Memmap.empty in + let spec = Image.Spec.from_arch arch in + { arch; code; data; - file = filename; finish = Fn.id; spec; + file = filename; + finish = Fn.id; + spec; target; - memory = code - } |> List.return + memory = code; + } + |> List.return let binary ?base arch ~filename = raw ?base arch (Bap_fileutils.readfile filename) @@ -365,88 +370,86 @@ module Input = struct let raw_file ?base target filename = raw ~target ?base `unknown (Bap_fileutils.readfile filename) - let from_bigstring ?base target data = - raw ~target ?base `unknown data + let from_bigstring ?base target data = raw ~target ?base `unknown data let from_string ?base target data = from_bigstring ?base target (Bigstring.of_string data) - let available_loaders () = - Hashtbl.keys loaders @ Image.available_backends () + let available_loaders () = Hashtbl.keys loaders @ Image.available_backends () end type input = Input.t type project = t +type bound = [ `min | `max ] [@@deriving sexp] +type spec = [ `name | bound ] [@@deriving sexp] -type bound = [`min | `max] [@@deriving sexp] -type spec = [`name | bound] [@@deriving sexp] - -type subst = [ - | `section of spec +type subst = + [ `section of spec | `symbol of spec | `memory of bound | `block of bound | `asm - | `bil -] [@@deriving sexp] + | `bil ] +[@@deriving sexp] - -let roots rooter = match rooter with - | None -> [] - | Some r -> Rooter.roots r |> Seq.to_list +let roots rooter = + match rooter with None -> [] | Some r -> Rooter.roots r |> Seq.to_list module Cfg = Graphs.Cfg let empty_disasm = Disasm.create Cfg.empty let pp_mem ppf mem = - fprintf ppf "%s" - (Addr.string_of_value (Memory.min_addr mem)) + fprintf ppf "%s" (Addr.string_of_value (Memory.min_addr mem)) let pp_disasm_error ppf = function | `Failed_to_disasm mem -> - fprintf ppf "can't disassemble an instruction at address %a" pp_mem mem - | `Failed_to_lift (_mem,insn,err) -> - fprintf ppf "<%s>: %a" - (Disasm_expert.Basic.Insn.asm insn) Error.pp err + fprintf ppf "can't disassemble an instruction at address %a" pp_mem mem + | `Failed_to_lift (_mem, insn, err) -> + fprintf ppf "<%s>: %a" (Disasm_expert.Basic.Insn.asm insn) Error.pp err -let set_package package = match package with +let set_package package = + match package with | None -> KB.return () | Some pkg -> KB.Symbol.set_package pkg let unused_options = - List.iter ~f:(Option.iter ~f:(fun name -> - warning "Project.create parameter %S is deprecated, \ - please consult the documentation for the proper \ - alternative" name)) - -let empty_unit target = { - spec = Ogre.Doc.empty; - arch = `unknown; - target; - state = State.empty; - disasm = Disasm.create Graphs.Cfg.empty; - memory = Memmap.empty; - symbols = Symtab.empty; -} + List.iter + ~f: + (Option.iter ~f:(fun name -> + warning + "Project.create parameter %S is deprecated, please consult the \ + documentation for the proper alternative" + name)) + +let empty_unit target = + { + spec = Ogre.Doc.empty; + arch = `unknown; + target; + state = State.empty; + disasm = Disasm.create Graphs.Cfg.empty; + memory = Memmap.empty; + symbols = Symtab.empty; + } -let empty target = { - main = empty_unit target; - libraries = []; - storage = Dict.empty; - program = Program.create (); - passes = []; -} +let empty target = + { + main = empty_unit target; + libraries = []; + storage = Dict.empty; + program = Program.create (); + passes = []; + } -let (=?) name = Option.map ~f:(fun _ -> name) +let ( =? ) name = Option.map ~f:(fun _ -> name) let compute_unit ?package ?state input = let open KB.Syntax in let compute_target target spec = - if Theory.Target.is_unknown target - then target_of_spec spec - else !!target in - let {Input.arch; data; code; file; spec; target; memory; _} = input in + if Theory.Target.is_unknown target then target_of_spec spec else !!target + in + let { Input.arch; data; code; file; spec; target; memory; _ } = input in Signal.send Info.got_file file; Signal.send Info.got_arch arch; Signal.send Info.got_data data; @@ -458,29 +461,32 @@ let compute_unit ?package ?state input = Theory.with_current theory @@ fun () -> compute_target target spec >>= fun target -> Theory.Unit.for_file file >>= fun unit -> - let* state = match state with + let* state = + match state with | Some state -> !!state | None -> - with_filename spec target code memory file @@ fun () -> - KB.collect State.slot unit >>= fun state -> - if KB.Domain.is_empty (KB.Slot.domain State.slot) state - then - Memmap.to_sequence code |> Seq.to_list_rev |> - KB.List.fold ~init:State.empty ~f:(fun k (mem,_) -> - State.disassemble k mem) >>= - State.partition >>= fun state -> - KB.provide State.slot unit state >>| fun () -> - state - else !!state in + with_filename spec target code memory file @@ fun () -> + KB.collect State.slot unit >>= fun state -> + if KB.Domain.is_empty (KB.Slot.domain State.slot) state then + Memmap.to_sequence code |> Seq.to_list_rev + |> KB.List.fold ~init:State.empty ~f:(fun k (mem, _) -> + State.disassemble k mem) + >>= State.partition + >>= fun state -> + KB.provide State.slot unit state >>| fun () -> state + else !!state + in State.cfg state >>= fun cfg -> State.symbols state >>= fun symbols -> Program.KB.lift symbols >>| fun prog -> - let prog = Term.map sub_t prog ~f:(fun sub -> - if Term.has_attr sub Bap_attributes.address then - Term.set_attr sub Bap_attributes.filename file - else sub) in + let prog = + Term.map sub_t prog ~f:(fun sub -> + if Term.has_attr sub Bap_attributes.address then + Term.set_attr sub Bap_attributes.filename file + else sub) + in let disasm = Disasm.create cfg in - {spec; arch; target; state; disasm; memory; symbols}, prog, unit + ({ spec; arch; target; state; disasm; memory; symbols }, prog, unit) let compute_units ?package ?state main libs = let open KB.Syntax in @@ -488,67 +494,60 @@ let compute_units ?package ?state main libs = KB.List.map libs ~f:compute_unit >>= fun libs -> let prog, libs = List.fold libs ~init:(prog, []) ~f:(fun (p, l) (info, prog, unit) -> - Term.enum sub_t prog |> Seq.fold ~init:p ~f:(Term.append sub_t), - {unit; info} :: l) in - set_package package >>| fun () -> - main, List.rev libs, prog - -let create - ?package - ?state - ?disassembler:p1 - ?brancher:p2 - ?symbolizer:p3 - ?rooter:p4 - ?reconstructor:p5 - (read : input) = + ( Term.enum sub_t prog |> Seq.fold ~init:p ~f:(Term.append sub_t), + { unit; info } :: l )) + in + set_package package >>| fun () -> (main, List.rev libs, prog) + +let create ?package ?state ?disassembler:p1 ?brancher:p2 ?symbolizer:p3 + ?rooter:p4 ?reconstructor:p5 (read : input) = try - unused_options [ - "disassembler" =? p1; - "brancher" =? p2; - "symbolizer" =? p3; - "rooter" =? p4; - "reconstructor" =? p5; - ]; + unused_options + [ + "disassembler" =? p1; + "brancher" =? p2; + "symbolizer" =? p3; + "rooter" =? p4; + "reconstructor" =? p5; + ]; match read () with | [] -> assert false | main_in :: libs_in -> - let result = Toplevel.var "disassembly-result" in - Toplevel.put result @@ compute_units main_in libs_in ?package ?state; - let main, libraries, program = Toplevel.get result in - Result.return @@ main_in.finish { - main; - libraries; - program; - storage = Dict.set Dict.empty filename main_in.file; - passes=[]; - } + let result = Toplevel.var "disassembly-result" in + Toplevel.put result @@ compute_units main_in libs_in ?package ?state; + let main, libraries, program = Toplevel.get result in + Result.return + @@ main_in.finish + { + main; + libraries; + program; + storage = Dict.set Dict.empty filename main_in.file; + passes = []; + } with | Toplevel.Conflict err -> - let open Error.Internal_repr in - let msg = - String (Format.asprintf "Knowledge Base Conflict: %a" - KB.Conflict.pp err) in - Error (to_info msg) + let open Error.Internal_repr in + let msg = + String + (Format.asprintf "Knowledge Base Conflict: %a" KB.Conflict.pp err) + in + Error (to_info msg) | exn -> Or_error.of_exn ~backtrace:`Get exn let specification = spec - -let with_symbols p x = {p with main = {p.main with symbols = x}} -let with_program p x = {p with program = x} -let map_program p ~f = {p with program = f p.program} - -let with_memory p memory = {p with main = {p.main with memory}} +let with_symbols p x = { p with main = { p.main with symbols = x } } +let with_program p x = { p with program = x } +let map_program p ~f = { p with program = f p.program } +let with_memory p memory = { p with main = { p.main with memory } } let with_storage = Field.fset Fields.storage let restore_state _ = - failwith "Project.restore_state: this function should no be used. - Please use the Toplevel module to save/restore the state." - -let set t tag x = - with_storage t @@ - Dict.set t.storage tag x + failwith + "Project.restore_state: this function should no be used.\n\ + \ Please use the Toplevel module to save/restore the state." +let set t tag x = with_storage t @@ Dict.set t.storage tag x let get t = Dict.find t.storage let has t = Dict.mem t.storage let del t tag = with_storage t @@ Dict.remove t.storage tag @@ -560,9 +559,9 @@ let subst_of_string = function | "symbol" | "symbol_name" -> Some (`symbol `name) | "symbol_addr" | "symbol_min_addr" -> Some (`symbol `min) | "symbol_max_addr" -> Some (`symbol `max) - | "bil" -> Some (`bil) - | "asm" -> Some (`asm) - | "block" | "block_name" -> Some (`block `name) + | "bil" -> Some `bil + | "asm" -> Some `asm + | "block" | "block_name" -> Some (`block `name) | "block_addr" | "block_min_addr" -> Some (`block `min) | "block_max_addr" -> Some (`block `max) | "min_addr" | "addr" -> Some (`memory `min) @@ -570,44 +569,51 @@ let subst_of_string = function | _ -> None let addr which mem = - let take = match which with - | `min -> Memory.min_addr - | `max -> Memory.max_addr in + let take = + match which with `min -> Memory.min_addr | `max -> Memory.max_addr + in sprintf "0x%s" @@ Addr.string_of_value (take mem) - let tag_memory project mem tag x = - {project with - main = { - project.main with - memory = Memmap.add project.main.memory mem (Value.create tag x) - }} + { + project with + main = + { + project.main with + memory = Memmap.add project.main.memory mem (Value.create tag x); + }; + } let substitute project mem tag value : t = let find_tag tag mem = - Memmap.dominators (memory project) mem |> - Seq.find_map ~f:(fun (mem,v) -> match Value.get tag v with - | Some reg -> Some (mem,reg) - | None -> None) in + Memmap.dominators (memory project) mem + |> Seq.find_map ~f:(fun (mem, v) -> + match Value.get tag v with + | Some reg -> Some (mem, reg) + | None -> None) + in let find_section = find_tag Image.section in let find_symbol mem = - Symtab.owners (symbols project) (Memory.min_addr mem) |> - List.hd |> - Option.map ~f:(fun (name,entry,_) -> - Block.memory entry, name) in + Symtab.owners (symbols project) (Memory.min_addr mem) + |> List.hd + |> Option.map ~f:(fun (name, entry, _) -> (Block.memory entry, name)) + in let find_block mem = - Symtab.dominators (symbols project) mem |> - List.find_map ~f:(fun (_,_,cfg) -> - Seq.find_map (Cfg.nodes cfg) ~f:(fun block -> - if Addr.(Block.addr block = Memory.min_addr mem) - then Some (Block.memory block, block) - else None)) in - let subst_section (mem,name) = function + Symtab.dominators (symbols project) mem + |> List.find_map ~f:(fun (_, _, cfg) -> + Seq.find_map (Cfg.nodes cfg) ~f:(fun block -> + if Addr.(Block.addr block = Memory.min_addr mem) then + Some (Block.memory block, block) + else None)) + in + let subst_section (mem, name) = function | #bound as b -> addr b mem - | `name -> name in - let subst_block (mem,_block) = function + | `name -> name + in + let subst_block (mem, _block) = function | #bound as b -> addr b mem - | `name -> "blk_"^addr `min mem in + | `name -> "blk_" ^ addr `min mem + in let asm insn = Insn.asm insn in let bil insn = asprintf "%a" Bil.pp (Insn.bil insn) in let subst_disasm mem out = @@ -615,66 +621,65 @@ let substitute project mem tag value : t = match Disasm.of_mem (arch project) mem with | Error _er -> "" | Ok dis -> - Disasm.insns dis |> - Seq.map ~f:(fun (_,insn) -> inj insn) |> Seq.to_list |> - String.concat ~sep:"\n" in + Disasm.insns dis + |> Seq.map ~f:(fun (_, insn) -> inj insn) + |> Seq.to_list |> String.concat ~sep:"\n" + in let apply_subst find mem subst spec value = - match find mem with - | Some thing -> subst thing spec - | None -> value in + match find mem with Some thing -> subst thing spec | None -> value + in let sub mem x = let buf = Buffer.create (String.length x) in - Buffer.add_substitute buf (fun x -> match subst_of_string x with + Buffer.add_substitute buf + (fun x -> + match subst_of_string x with | Some (`section spec) -> - apply_subst find_section mem subst_section spec x + apply_subst find_section mem subst_section spec x | Some (`symbol spec) -> - apply_subst find_symbol mem subst_section spec x + apply_subst find_symbol mem subst_section spec x | Some (`memory bound) -> addr bound mem - | Some (`block spec) -> - apply_subst find_block mem subst_block spec x - | Some (`bil | `asm as out) -> subst_disasm mem out - | None -> x) x; - Buffer.contents buf in + | Some (`block spec) -> apply_subst find_block mem subst_block spec x + | Some ((`bil | `asm) as out) -> subst_disasm mem out + | None -> x) + x; + Buffer.contents buf + in tag_memory project mem tag (sub mem value) module DList = Doubly_linked type pass = { name : string; - main : (t -> t); + main : t -> t; deps : string list; auto : bool; once : bool; - starts : float stream; - finishes : float stream; - failures : float stream; - successes : float stream; + starts : float stream; + finishes : float stream; + failures : float stream; + successes : float stream; } -let sexp_of_pass {name} = sexp_of_string name - +let sexp_of_pass { name } = sexp_of_string name let passes : pass DList.t = DList.create () -let pass_registrations,pass_registered = Stream.create () - +let pass_registrations, pass_registered = Stream.create () let forget : pass DList.Elt.t -> unit = fun _ -> () - let name_of_bundle () = - let module Self = Bap_self.Create() in + let module Self = Bap_self.Create () in Self.name -let register_pass ?(autorun=false) ?(runonce=autorun) ?(deps=[]) ?name main : unit = +let register_pass ?(autorun = false) ?(runonce = autorun) ?(deps = []) ?name + main : unit = let pref = name_of_bundle () in - let name = match name with - | None -> pref - | Some name -> pref ^ "-" ^ name in - let starts,started = Stream.create () in - let successes,succeded = Stream.create () in - let failures,failed = Stream.create () in + let name = match name with None -> pref | Some name -> pref ^ "-" ^ name in + let starts, started = Stream.create () in + let successes, succeded = Stream.create () in + let failures, failed = Stream.create () in let finishes = - Stream.either successes failures |> - Stream.map ~f:Either.value in + Stream.either successes failures |> Stream.map ~f:Either.value + in let now () = Caml_unix.gettimeofday () in let main project = Signal.send started (now ()); @@ -684,25 +689,35 @@ let register_pass ?(autorun=false) ?(runonce=autorun) ?(deps=[]) ?name main : un project with exn -> Signal.send failed (now ()); - raise exn in - let pass = { - name; main; deps; - once = runonce; auto = autorun; - starts; finishes; - failures; successes; - } in + raise exn + in + let pass = + { + name; + main; + deps; + once = runonce; + auto = autorun; + starts; + finishes; + failures; + successes; + } + in DList.insert_last passes pass |> forget; Signal.send pass_registered pass let register_pass' ?autorun ?runonce ?deps ?name v : unit = - register_pass ?autorun ?runonce ?deps ?name (fun p -> v p; p) + register_pass ?autorun ?runonce ?deps ?name (fun p -> + v p; + p) type second = float + module Pass = struct type t = pass [@@deriving sexp_of] - type error = - | Unsat_dep of pass * string - | Runtime_error of pass * exn + + type error = Unsat_dep of pass * string | Runtime_error of pass * exn [@@deriving variants, sexp_of] let find name : pass option = @@ -712,36 +727,35 @@ module Pass = struct let fail = function | Unsat_dep _ as err -> raise (Failed err) - | Runtime_error (pass,exn) -> - let backtrace = Caml.Printexc.get_backtrace () in - raise (Failed (Runtime_error (pass, Exn.Reraised (backtrace, exn)))) + | Runtime_error (pass, exn) -> + let backtrace = Stdlib.Printexc.get_backtrace () in + raise (Failed (Runtime_error (pass, Exn.Reraised (backtrace, exn)))) let is_evaled pass proj = List.exists proj.passes ~f:(fun name -> String.equal name pass.name) - let eval pass proj = { - (pass.main proj) with - passes = pass.name :: proj.passes - } + let eval pass proj = + { (pass.main proj) with passes = pass.name :: proj.passes } let rec exec proj pass = if pass.once && is_evaled pass proj then proj else - let deps = List.map pass.deps ~f:(fun name -> match find name with - | None -> fail @@ unsat_dep pass name - | Some dep -> dep) in + let deps = + List.map pass.deps ~f:(fun name -> + match find name with + | None -> fail @@ unsat_dep pass name + | Some dep -> dep) + in let proj = List.fold deps ~init:proj ~f:exec in - try eval pass proj with - exn -> fail @@ runtime_error pass exn + try eval pass proj with exn -> fail @@ runtime_error pass exn let run_exn pass proj = exec proj pass - let run pass proj : (project,error) Result.t = - try Ok (exec proj pass) with - | Failed error -> Error error + let run pass proj : (project, error) Result.t = + try Ok (exec proj pass) with Failed error -> Error error let name p = p.name - let autorun p = p.auto + let autorun p = p.auto let starts p = p.starts let finishes p = p.finishes let failures p = p.failures @@ -751,13 +765,10 @@ end let passes () = DList.to_list passes let find_pass = Pass.find -module Registry(T : T)(I : T) = struct +module Registry (T : T) (I : T) = struct open Bap_knowledge - type info = { - name : Knowledge.Name.t; - desc : string option; - extra : I.t; - } + + type info = { name : Knowledge.Name.t; desc : string option; extra : I.t } let registry : (Knowledge.name, string option * T.t * I.t) Hashtbl.t = Hashtbl.create (module Knowledge.Name) @@ -765,50 +776,55 @@ module Registry(T : T)(I : T) = struct let register ?desc ?package name extra entity = let name = Knowledge.Name.create ?package name in if Hashtbl.mem registry name then - invalid_argf "An element with name %s is already registered \ - please choose a unique name" + invalid_argf + "An element with name %s is already registered please choose a unique \ + name" (Knowledge.Name.show name) (); - Hashtbl.add_exn registry name (desc,entity,extra) + Hashtbl.add_exn registry ~key:name ~data:(desc, entity, extra) let find ?package name = let name = Knowledge.Name.read ?package name in match Hashtbl.find registry name with - | Some (_,x,_) -> Some x + | Some (_, x, _) -> Some x | None -> None let registered () = - Hashtbl.to_alist registry |> - List.map ~f:(fun (name,(desc,_,extra)) -> {name; desc; extra}) + Hashtbl.to_alist registry + |> List.map ~f:(fun (name, (desc, _, extra)) -> { name; desc; extra }) + + let name { name } = name - let name {name} = name let desc = function - | {desc=None} -> "not provided" - | {desc=Some txt} -> txt - let extra {extra} = extra + | { desc = None } -> "not provided" + | { desc = Some txt } -> txt + + let extra { extra } = extra end module Collator = struct - type t = Collator : { - prepare : project -> 's; - collate : int -> 's -> project -> 's; - summary : 's -> unit; - } -> t + type t = + | Collator : { + prepare : project -> 's; + collate : int -> 's -> project -> 's; + summary : 's -> unit; + } + -> t - include Registry(struct type nonrec t = t end)(Unit) + include + Registry + (struct + type nonrec t = t + end) + (Unit) - let apply (Collator {prepare; collate; summary}) projects = + let apply (Collator { prepare; collate; summary }) projects = match Seq.split_n projects 1 with - | [base],rest -> - summary @@ - Seq.foldi ~init:(prepare base) rest ~f:collate + | [ base ], rest -> + summary @@ Seq.foldi ~init:(prepare base) rest ~f:collate | _ -> () let register ?desc ?package name ~prepare ~collate ~summary = - register ?desc ?package name () @@ Collator { - prepare; - collate; - summary; - } + register ?desc ?package name () @@ Collator { prepare; collate; summary } end module Analysis = struct @@ -829,37 +845,25 @@ module Analysis = struct rule : string; } - type ('a,'r) args = { - run : 'a -> 'r arg; - grammar : string list; - } - - type problem = - | No_input - | Bad_syntax of string - | Trailing_input - - type parse_error = { - ctxt : ctxt; - problem : problem - } - + type ('a, 'r) args = { run : 'a -> 'r arg; grammar : string list } + type problem = No_input | Bad_syntax of string | Trailing_input + type parse_error = { ctxt : ctxt; problem : problem } type Knowledge.conflict += Fail of parse_error - - let fail ctxt problem = - Knowledge.fail (Fail {ctxt; problem}) + let fail ctxt problem = Knowledge.fail (Fail { ctxt; problem }) let string_of_problem = function | No_input -> "expects an argument" | Bad_syntax msg -> msg | Trailing_input -> "too many arguments" - let string_of_parse_error {ctxt; problem} = - sprintf "Syntax error: when parsing rule %s of argument %d - %s" - ctxt.rule (ctxt.pos+1) (string_of_problem problem) + let string_of_parse_error { ctxt; problem } = + sprintf "Syntax error: when parsing rule %s of argument %d - %s" ctxt.rule + (ctxt.pos + 1) + (string_of_problem problem) - let () = Knowledge.Conflict.register_printer @@ function + let () = + Knowledge.Conflict.register_printer @@ function | Fail err -> Some (string_of_parse_error err) | _ -> None @@ -867,88 +871,101 @@ module Analysis = struct match ctxt.inputs with | [] -> fail ctxt No_input | x :: xs -> - let fail x = fail ctxt (Bad_syntax x) in - parse ~fail x >>| fun r -> r,{ - ctxt with pos = ctxt.pos + 1; - parsed = x :: ctxt.parsed; - inputs = xs; - } - - let argument ?(desc="No description") ~parse rule = { - parse=(required parse); rule; desc; - } - - let optional arg = { - rule = sprintf "[%s]" arg.rule; - desc = arg.desc; - parse = fun ctxt -> match ctxt.inputs with - | [] -> KB.return (None,ctxt) - | _ -> arg.parse ctxt >>| fun (x,ctxt) -> Some x,ctxt - } + let fail x = fail ctxt (Bad_syntax x) in + parse ~fail x >>| fun r -> + ( r, + { + ctxt with + pos = ctxt.pos + 1; + parsed = x :: ctxt.parsed; + inputs = xs; + } ) + + let argument ?(desc = "No description") ~parse rule = + { parse = required parse; rule; desc } + + let optional arg = + { + rule = sprintf "[%s]" arg.rule; + desc = arg.desc; + parse = + (fun ctxt -> + match ctxt.inputs with + | [] -> KB.return (None, ctxt) + | _ -> arg.parse ctxt >>| fun (x, ctxt) -> (Some x, ctxt)); + } let pull_keyword kw inputs = let rec loop searched = function | [] -> None - | [k] when String.equal k kw && List.is_empty searched -> - Some [] + | [ k ] when String.equal k kw && List.is_empty searched -> Some [] | k :: x :: xs when String.equal k kw -> - Some (x :: List.rev_append searched xs) - | x :: xs -> loop (x::searched) xs in + Some (x :: List.rev_append searched xs) + | x :: xs -> loop (x :: searched) xs + in loop [] inputs let filter_flag kw inputs = let rec loop searched = function | [] -> None - | k :: xs when String.equal k kw -> - Some (List.rev_append searched xs) - | x :: xs -> loop (x::searched) xs in + | k :: xs when String.equal k kw -> Some (List.rev_append searched xs) + | x :: xs -> loop (x :: searched) xs + in loop [] inputs - let keyword key arg = { - rule = sprintf "[:%s %s]" key arg.rule; - desc = "an argument prefixed by the keyword"; - parse = fun ctxt -> - match pull_keyword (":"^key) ctxt.inputs with - | None -> KB.return (None,ctxt) - | Some inputs -> arg.parse {ctxt with inputs} >>| - fun (x,ctxt) -> Some x,ctxt - } + let keyword key arg = + { + rule = sprintf "[:%s %s]" key arg.rule; + desc = "an argument prefixed by the keyword"; + parse = + (fun ctxt -> + match pull_keyword (":" ^ key) ctxt.inputs with + | None -> KB.return (None, ctxt) + | Some inputs -> + arg.parse { ctxt with inputs } >>| fun (x, ctxt) -> (Some x, ctxt)); + } - let flag key = { - rule = sprintf "[:%s]" key; - desc = "an optional flag"; - parse = fun ctxt -> - match filter_flag (":"^key) ctxt.inputs with - | None -> KB.return (false,ctxt) - | Some inputs -> KB.return (true, {ctxt with inputs}) - } + let flag key = + { + rule = sprintf "[:%s]" key; + desc = "an optional flag"; + parse = + (fun ctxt -> + match filter_flag (":" ^ key) ctxt.inputs with + | None -> KB.return (false, ctxt) + | Some inputs -> KB.return (true, { ctxt with inputs })); + } let apply_until_exhausted ctxt arg = - let rec loop rs ctxt = match ctxt.inputs with - | [] -> KB.return (List.rev rs,ctxt) - | _ -> arg.parse ctxt >>= fun (r,ctxt) -> - loop (r::rs) ctxt in + let rec loop rs ctxt = + match ctxt.inputs with + | [] -> KB.return (List.rev rs, ctxt) + | _ -> arg.parse ctxt >>= fun (r, ctxt) -> loop (r :: rs) ctxt + in loop [] ctxt - let rest arg = { - rule = sprintf "[%s] ..." arg.rule; - desc = arg.desc; - parse = fun ctxt -> apply_until_exhausted ctxt arg - } - - let empty = { - rule = ""; - desc = "no arguments are expected"; - parse = fun ctxt -> match ctxt.inputs with - | [] -> KB.return ((),ctxt) - | _ -> fail ctxt Trailing_input - } + let rest arg = + { + rule = sprintf "[%s] ..." arg.rule; + desc = arg.desc; + parse = (fun ctxt -> apply_until_exhausted ctxt arg); + } + let empty = + { + rule = ""; + desc = "no arguments are expected"; + parse = + (fun ctxt -> + match ctxt.inputs with + | [] -> KB.return ((), ctxt) + | _ -> fail ctxt Trailing_input); + } let parse_string ~fail:_ x = !!x - let string = argument "" - ~parse:parse_string + let string = + argument "" ~parse:parse_string ~desc:"a sequence of characters without whitespaces" let parse_object cls ~fail:_ x = KB.Object.read cls x @@ -964,94 +981,103 @@ module Analysis = struct ~desc:(sprintf "an object of the core:unit class") let parse_bitvec ~fail str = - try !!(Bitvec.of_string str) - with Invalid_argument msg -> - fail msg + try !!(Bitvec.of_string str) with Invalid_argument msg -> fail msg - let bitvec = argument "" - ~parse:parse_bitvec + let bitvec = + argument "" ~parse:parse_bitvec ~desc:"a bitvector of arbitrary length" module Arg = struct type 'a t = 'a arg - let map arg ~f = { - arg with - parse = fun ctxt -> - arg.parse {ctxt with rule = arg.rule} >>| fun (x,ctxt) -> - f x,ctxt - } - let apply ({parse=f} as lhs) ({parse=x} as rhs) = { - rule = lhs.rule ^ " " ^ rhs.rule; - desc = ""; - parse = fun ctxt -> - f {ctxt with rule = lhs.rule} >>= fun (f,ctxt) -> - x {ctxt with rule = rhs.rule} >>| fun (x,ctxt) -> - f x,ctxt - } + let map arg ~f = + { + arg with + parse = + (fun ctxt -> + arg.parse { ctxt with rule = arg.rule } >>| fun (x, ctxt) -> + (f x, ctxt)); + } + + let apply ({ parse = f } as lhs) ({ parse = x } as rhs) = + { + rule = lhs.rule ^ " " ^ rhs.rule; + desc = ""; + parse = + (fun ctxt -> + f { ctxt with rule = lhs.rule } >>= fun (f, ctxt) -> + x { ctxt with rule = rhs.rule } >>| fun (x, ctxt) -> (f x, ctxt)); + } end - let ($) t arg = { - grammar = arg.rule :: t.grammar; - run = fun f -> Arg.apply (t.run f) arg - } - let args a = { - grammar = [a.rule]; - run = fun f -> Arg.map ~f a - } + let ( $ ) t arg = + { + grammar = arg.rule :: t.grammar; + run = (fun f -> Arg.apply (t.run f) arg); + } + + let args a = { grammar = [ a.rule ]; run = (fun f -> Arg.map ~f a) } let apply ~f args = args.run f let run inputs args code = - let ctxt = {rule = ""; pos=0; parsed=[]; inputs} in - (apply args ~f:code).parse ctxt >>= fun (f,_ctxt) -> - f + let ctxt = { rule = ""; pos = 0; parsed = []; inputs } in + (apply args ~f:code).parse ctxt >>= fun (f, _ctxt) -> f type t = string list -> unit knowledge - include Registry(struct type nonrec t = t end)(struct - type t = string list - end) + + include + Registry + (struct + type nonrec t = t + end) + (struct + type t = string list + end) let register ?desc ?package name args analysis = let code inputs = run inputs args analysis in register ?desc ?package name (List.rev args.grammar) code let apply f xs = f xs - let grammar = extra module Grammar = struct type t = string list + let to_string = String.concat ~sep:" " end + type grammar = Grammar.t end module type S = sig type t + val empty : t val of_image : image -> t + module Factory : Bap_disasm_source.Factory with type t = t end -include Data.Make(struct - type nonrec t = t - let version = "2.0.0" - end) +include Data.Make (struct + type nonrec t = t -let () = - Data.set_module_name instance "Bap.Std.Project" + let version = "2.0.0" +end) + +let () = Data.set_module_name instance "Bap.Std.Project" let () = let open KB.Rule in - declare ~package:"bap" "project-filename" |> - dynamic ["input"] |> - dynamic ["data"; "code"; "path"] |> - dynamic ["loader"] |> - require Theory.Label.addr |> - provide Theory.Label.unit |> - comment {| + declare ~package:"bap" "project-filename" + |> dynamic [ "input" ] + |> dynamic [ "data"; "code"; "path" ] + |> dynamic [ "loader" ] |> require Theory.Label.addr + |> provide Theory.Label.unit + |> comment + {| On [Project.create input] provides [unit] for the address [x] if [x] in [data] or [x] in [code]. The [unit] is initialized with the specification from the loader, filename, target name, etc. -|}; +|} diff --git a/lib/bap/bap_project.mli b/lib/bap/bap_project.mli index 38ff31a4c..6264a9b4f 100644 --- a/lib/bap/bap_project.mli +++ b/lib/bap/bap_project.mli @@ -1,6 +1,6 @@ open Bap_knowledge open Bap_core_theory -open Core_kernel[@@warning "-D"] +open Core open Regular.Std open Bap_future.Std open Bap_types.Std @@ -17,7 +17,6 @@ type state [@@deriving bin_io] type second = float val state : t -> state - val empty : Theory.Target.t -> t val create : @@ -28,7 +27,8 @@ val create : ?symbolizer:symbolizer source -> ?rooter:rooter source -> ?reconstructor:reconstructor source -> - input -> t Or_error.t + input -> + t Or_error.t val arch : t -> arch val target : t -> Theory.Target.t @@ -44,7 +44,7 @@ val memory_slot : (Theory.Unit.cls, value Memmap.t) KB.slot val disasm : t -> disasm val with_memory : t -> value memmap -> t val tag_memory : t -> mem -> 'a tag -> 'a -> t -val substitute : t -> mem -> string tag -> string -> t +val substitute : t -> mem -> string tag -> string -> t val set : t -> 'a tag -> 'a -> t val get : t -> 'a tag -> 'a option val has : t -> 'a tag -> bool @@ -68,6 +68,7 @@ val map_program : t -> f:(program term -> program term) -> t module State : sig type t = state + val disassembly : t -> Bap_disasm_driver.state val subroutines : t -> Bap_disasm_calls.t val slot : (Theory.Unit.cls, state) KB.slot @@ -91,19 +92,20 @@ module Input : sig ?target:Theory.Target.t -> ?loader:string -> ?libraries:string list -> - string -> t + string -> + t val custom : ?finish:(project -> project) -> ?filename:string -> ?code:value memmap -> ?data:value memmap -> - Theory.Target.t -> t + Theory.Target.t -> + t val raw_file : ?base:addr -> Theory.Target.t -> string -> t val from_string : ?base:addr -> Theory.Target.t -> string -> t val from_bigstring : ?base:addr -> Theory.Target.t -> Bigstring.t -> t - val file : ?loader:string -> filename:string -> t val binary : ?base:addr -> arch -> filename:string -> t @@ -112,7 +114,8 @@ module Input : sig arch -> string -> code:value memmap -> - data:value memmap -> t + data:value memmap -> + t val register_loader : string -> (string -> t) -> unit val available_loaders : unit -> string list @@ -121,23 +124,19 @@ end module Pass : sig type t = pass [@@deriving sexp_of] - type error = - | Unsat_dep of pass * string - | Runtime_error of pass * exn + type error = Unsat_dep of pass * string | Runtime_error of pass * exn [@@deriving sexp_of] exception Failed of error [@@deriving sexp] - val run : t -> project -> (project,error) Result.t + val run : t -> project -> (project, error) Result.t val run_exn : t -> project -> project - val name : t -> string val autorun : t -> bool - - val starts : t -> second stream - val finishes : t -> second stream + val starts : t -> second stream + val finishes : t -> second stream val successes : t -> second stream - val failures : t -> second stream + val failures : t -> second stream end module Collator : sig @@ -149,7 +148,10 @@ module Collator : sig val name : info -> Knowledge.Name.t val desc : info -> string - val register : ?desc:string -> ?package:string -> string -> + val register : + ?desc:string -> + ?package:string -> + string -> prepare:(project -> 's) -> collate:(int -> 's -> project -> 's) -> summary:('s -> unit) -> @@ -163,27 +165,29 @@ module Analysis : sig type info type grammar type 'a arg - type ('a,'r) args + type ('a, 'r) args val args : 'a arg -> ('a -> 'b, 'b) args - val ($) : ('a, 'b -> 'c) args -> 'b arg -> ('a,'c) args - + val ( $ ) : ('a, 'b -> 'c) args -> 'b arg -> ('a, 'c) args val empty : unit arg val string : string arg val bitvec : Bitvec.t arg val program : Theory.Label.t arg val unit : Theory.Unit.t arg - val optional : 'a arg -> 'a option arg val keyword : string -> 'a arg -> 'a option arg val flag : string -> bool arg val rest : 'a arg -> 'a list arg - val register : ?desc:string -> ?package:string -> string -> - ('a,unit knowledge) args -> 'a -> unit + val register : + ?desc:string -> + ?package:string -> + string -> + ('a, unit knowledge) args -> + 'a -> + unit val registered : unit -> info list - val apply : t -> string list -> unit knowledge val find : ?package:string -> string -> t option val name : info -> Knowledge.Name.t @@ -193,29 +197,36 @@ module Analysis : sig val argument : ?desc:string -> parse:(fail:(string -> _ knowledge) -> string -> 'a knowledge) -> - string -> 'a arg + string -> + 'a arg module Grammar : sig type t = grammar + val to_string : grammar -> string end - end val find_pass : string -> pass option val register_pass : - ?autorun:bool -> ?runonce:bool -> ?deps:string list -> ?name:string - -> (t -> t) -> unit -val register_pass': - ?autorun:bool -> ?runonce:bool -> ?deps:string list -> ?name:string - -> (t -> unit) -> unit - - + ?autorun:bool -> + ?runonce:bool -> + ?deps:string list -> + ?name:string -> + (t -> t) -> + unit + +val register_pass' : + ?autorun:bool -> + ?runonce:bool -> + ?deps:string list -> + ?name:string -> + (t -> unit) -> + unit val pass_registrations : pass stream val passes : unit -> pass list - val restore_state : t -> unit include Data.S with type t := t diff --git a/lib/bap/bap_self.ml b/lib/bap/bap_self.ml index f972f2dd4..f89cf5cae 100644 --- a/lib/bap/bap_self.ml +++ b/lib/bap/bap_self.ml @@ -1,130 +1,120 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap_bundle.Std open Bap_future.Std open Bap_plugins.Std open Format open Cmdliner - module Event = Bap_main_event -module Buffer = Caml.Buffer -module Sys = Caml.Sys +module Buffer = Stdlib.Buffer +module Sys = Stdlib.Sys -module Create() = struct +module Create () = struct let main = let base = Filename.basename Sys.executable_name in try Filename.chop_extension base with _ -> base - let manifest = Manifest.current () let name = Manifest.name manifest let version = Manifest.version manifest let doc = Manifest.desc manifest - - let has_verbose = - Array.exists ~f:(function "--verbose" | _ -> false) + let has_verbose = Array.exists ~f:(function "--verbose" | _ -> false) let report_progress ?task ?note ?stage ?total () = - let task = match task with + let task = + match task with | None -> name - | Some subtask -> sprintf "%s/%s" name subtask in - let task = if String.(name = main) then task - else sprintf "%s/%s" main task in + | Some subtask -> sprintf "%s/%s" name subtask + in + let task = + if String.(name = main) then task else sprintf "%s/%s" main task + in Event.Log.progress ?note ?stage ?total task let filter_args name = let prefix = "--" ^ name ^ "-" in let is_key = String.is_prefix ~prefix:"-" in - Array.fold (Plugin.argv ()) ~init:([],`drop) ~f:(fun (args,act) arg -> + Array.fold (Plugin.argv ()) ~init:([], `drop) ~f:(fun (args, act) arg -> let take arg = ("--" ^ arg) :: args in - if String.equal arg Sys.argv.(0) then (name::args,`drop) - else match String.chop_prefix arg ~prefix, act with - | None,`take when is_key arg -> args,`drop - | None,`take -> arg::args,`drop - | None,`drop -> args,`drop - | Some arg,_ when String.mem arg '=' -> take arg,`drop - | Some arg,_ -> take arg,`take) |> - fst |> List.rev |> Array.of_list - - let argv = - if String.equal name main then Sys.argv - else filter_args name - - let has_var v = match Sys.getenv ("BAP_" ^ String.uppercase v) with - | exception Caml.Not_found -> false + if String.equal arg Sys.argv.(0) then (name :: args, `drop) + else + match (String.chop_prefix arg ~prefix, act) with + | None, `take when is_key arg -> (args, `drop) + | None, `take -> (arg :: args, `drop) + | None, `drop -> (args, `drop) + | Some arg, _ when String.mem arg '=' -> (take arg, `drop) + | Some arg, _ -> (take arg, `take)) + |> fst |> List.rev |> Array.of_list + + let argv = if String.equal name main then Sys.argv else filter_args name + + let has_var v = + match Sys.getenv ("BAP_" ^ String.uppercase v) with + | exception Stdlib.Not_found -> false | "false" | "0" -> false | _ -> true - let is_verbose = has_verbose argv || - has_var ("DEBUG_"^name) || - has_var ("DEBUG") + let is_verbose = + has_verbose argv || has_var ("DEBUG_" ^ name) || has_var "DEBUG" include Event.Log.Create () module Config = struct open Bap_main.Extension + type 'a parser = string -> [ `Ok of 'a | `Error of string ] type 'a printer = Format.formatter -> 'a -> unit type 'a converter = 'a Type.t type 'a param = 'a Future.t - type reader = {get : 'a. 'a param -> 'a} - type manpage_block = [ - | `I of string * string + type reader = { get : 'a. 'a param -> 'a } + + type manpage_block = + [ `I of string * string | `Noblank | `P of string | `Pre of string - | `S of string - ] + | `S of string ] let converter parser printer default = Type.define default - ~parse:(fun x -> match parser x with - | `Ok x -> x - | `Error err -> failwith err) - ~print:(fun x -> - Format.asprintf "%a" printer x) + ~parse:(fun x -> + match parser x with `Ok x -> x | `Error err -> failwith err) + ~print:(fun x -> Format.asprintf "%a" printer x) let prepend_deprecation dep doc = - match dep with - | None -> doc - | Some dep -> sprintf "%s %s" dep doc + match dep with None -> doc | Some dep -> sprintf "%s %s" dep doc - - let param t ?deprecated - ?default:(d=Type.default t) - ?as_flag - ?(docv=Type.name t) ?(doc="Undocumented.") ?synonyms name = + let param t ?deprecated ?default:(d = Type.default t) ?as_flag + ?(docv = Type.name t) ?(doc = "Undocumented.") ?synonyms name = let t = Type.(docv %: t =? d) in let doc = prepend_deprecation deprecated doc in - Configuration.parameter ~doc ?as_flag ?aliases:synonyms t name |> - Configuration.determined + Configuration.parameter ~doc ?as_flag ?aliases:synonyms t name + |> Configuration.determined - let param_all t ?deprecated - ?default - ?as_flag - ?(docv=Type.name t) ?(doc="Undocumented.") ?synonyms name = + let param_all t ?deprecated ?default ?as_flag ?(docv = Type.name t) + ?(doc = "Undocumented.") ?synonyms name = let t = Type.(docv %: t) in let doc = prepend_deprecation deprecated doc in - Configuration.parameters ~doc ?as_flag ?aliases:synonyms t name |> - Configuration.determined |> - Future.map ~f:(fun res -> match default with - | None -> res - | Some default -> match res with - | [] -> default - | xs -> xs) - - let flag ?deprecated ?docv:_ ?(doc="Undocumented.") ?synonyms name = + Configuration.parameters ~doc ?as_flag ?aliases:synonyms t name + |> Configuration.determined + |> Future.map ~f:(fun res -> + match default with + | None -> res + | Some default -> ( match res with [] -> default | xs -> xs)) + + let flag ?deprecated ?docv:_ ?(doc = "Undocumented.") ?synonyms name = let doc = prepend_deprecation deprecated doc in - Configuration.flag ~doc ?aliases:synonyms name |> - Configuration.determined + Configuration.flag ~doc ?aliases:synonyms name |> Configuration.determined let determined x = x + let declare_extension ?features ?provides ?doc f = declare ?features ?provides ?doc @@ fun _ -> - try Ok (f {get = fun x -> Future.peek_exn x}) with + try Ok (f { get = (fun x -> Future.peek_exn x) }) with | Invalid_argument s -> Error (Error.Invalid s) | exn -> - let backtrace = Caml.Printexc.get_backtrace () in - Error (Error.Bug (exn,backtrace)) + let backtrace = Stdlib.Printexc.get_backtrace () in + Error (Error.Bug (exn, backtrace)) + let when_ready f = declare_extension f let manpage (ps : manpage_block list) = @@ -132,11 +122,11 @@ module Create() = struct let buf = Buffer.create 64 in let ppf = formatter_of_buffer buf in List.iter ps ~f:(function - | `S name -> fprintf ppf "# %s@\n@\n" name - | `P text -> fprintf ppf "%a@\n@\n" pp_print_text text - | `Pre code -> fprintf ppf "```@\n%s@\n```@\n@\n" code - | `I (item,desc) -> fprintf ppf "%s %s@\n@\n" item desc - | `Noblank -> ()); + | `S name -> fprintf ppf "# %s@\n@\n" name + | `P text -> fprintf ppf "%a@\n@\n" pp_print_text text + | `Pre code -> fprintf ppf "```@\n%s@\n```@\n@\n" code + | `I (item, desc) -> fprintf ppf "%s %s@\n@\n" item desc + | `Noblank -> ()); fprintf ppf "%!"; documentation (Buffer.contents buf) @@ -146,7 +136,9 @@ module Create() = struct let datadir = Configuration.datadir let libdir = Configuration.libdir let version = Configuration.version + include Type + let some ?none:_ t = some t end end diff --git a/lib/bap/bap_self.mli b/lib/bap/bap_self.mli index 6c4733367..0a72f4423 100644 --- a/lib/bap/bap_self.mli +++ b/lib/bap/bap_self.mli @@ -1,30 +1,24 @@ open Format -open Core_kernel[@@warning "-D"] +open Core open Bap_future.Std open Bap_main.Extension - -module Create() : sig +module Create () : sig val name : string val version : string val doc : string val argv : string array - - val debug : ('a,formatter,unit) format -> 'a - val info : ('a,formatter,unit) format -> 'a - val warning : ('a,formatter,unit) format -> 'a - val error : ('a,formatter,unit) format -> 'a - + val debug : ('a, formatter, unit) format -> 'a + val info : ('a, formatter, unit) format -> 'a + val warning : ('a, formatter, unit) format -> 'a + val error : ('a, formatter, unit) format -> 'a val debug_formatter : formatter val info_formatter : formatter val warning_formatter : formatter val error_formatter : formatter val report_progress : - ?task:string -> - ?note:string -> - ?stage:int -> - ?total:int -> unit -> unit + ?task:string -> ?note:string -> ?stage:int -> ?total:int -> unit -> unit module Config : sig val version : string @@ -33,32 +27,43 @@ module Create() : sig val confdir : string type 'a param - type 'a converter = 'a Type.t - type 'a parser = string -> [ `Ok of 'a | `Error of string ] type 'a printer = Format.formatter -> 'a -> unit - type reader = {get : 'a. 'a param -> 'a} + type reader = { get : 'a. 'a param -> 'a } val converter : 'a parser -> 'a printer -> 'a -> 'a Type.t - val deprecated : string val param : - 'a Type.t -> ?deprecated:string -> ?default:'a -> ?as_flag:'a -> - ?docv:string -> ?doc:string -> ?synonyms:string list -> - string -> 'a param + 'a Type.t -> + ?deprecated:string -> + ?default:'a -> + ?as_flag:'a -> + ?docv:string -> + ?doc:string -> + ?synonyms:string list -> + string -> + 'a param val param_all : 'a Type.t -> - ?deprecated:string -> ?default:'a list -> ?as_flag:'a -> - ?docv:string -> ?doc:string -> - ?synonyms:string list -> string -> 'a list param + ?deprecated:string -> + ?default:'a list -> + ?as_flag:'a -> + ?docv:string -> + ?doc:string -> + ?synonyms:string list -> + string -> + 'a list param val flag : ?deprecated:string -> - ?docv:string -> ?doc:string -> ?synonyms:string list -> - string -> bool param + ?docv:string -> + ?doc:string -> + ?synonyms:string list -> + string -> + bool param val determined : 'a param -> 'a future @@ -66,19 +71,19 @@ module Create() : sig ?features:string list -> ?provides:string list -> ?doc:string -> - (reader -> unit) -> unit + (reader -> unit) -> + unit + val when_ready : (reader -> unit) -> unit - type manpage_block = [ - | `I of string * string + type manpage_block = + [ `I of string * string | `Noblank | `P of string | `Pre of string - | `S of string - ] + | `S of string ] val manpage : manpage_block list -> unit - val bool : bool converter val char : char converter val int : int converter @@ -96,10 +101,22 @@ module Create() : sig val array : ?sep:char -> 'a converter -> 'a array converter val pair : ?sep:char -> 'a converter -> 'b converter -> ('a * 'b) converter val t2 : ?sep:char -> 'a converter -> 'b converter -> ('a * 'b) converter - val t3 : ?sep:char -> 'a converter -> 'b converter -> 'c converter -> + + val t3 : + ?sep:char -> + 'a converter -> + 'b converter -> + 'c converter -> ('a * 'b * 'c) converter - val t4 : ?sep:char -> 'a converter -> 'b converter -> 'c converter -> - 'd converter -> ('a * 'b * 'c * 'd) converter + + val t4 : + ?sep:char -> + 'a converter -> + 'b converter -> + 'c converter -> + 'd converter -> + ('a * 'b * 'c * 'd) converter + val some : ?none:string -> 'a converter -> 'a option converter end end diff --git a/lib/bap/dune b/lib/bap/dune index 1a61c70ea..a2288e0f4 100644 --- a/lib/bap/dune +++ b/lib/bap/dune @@ -1,37 +1,32 @@ -(deprecated_library_name - (old_public_name bap) - (new_public_name bap-std)) - (library (name bap) (public_name bap-std) (wrapped false) - (preprocess (pps ppx_bap)) + (preprocess + (pps ppx_bap)) (libraries - bap-bundle - bap-core-theory - bap_disasm - bap-future - bap_image - bap-knowledge - bap-main - bap_sema - bap_types - bitvec - bitvec-binprot - bitvec-order - bitvec-sexp - core_kernel - core_kernel.caml_unix - fileutils - graphlib - monads - ogre - regular) + bap-bundle + bap-core-theory + bap_disasm + bap-future + bap_image + bap-knowledge + bap-main + bap_sema + bap_types + bitvec + bitvec-binprot + bitvec-order + bitvec-sexp + core + fileutils + graphlib + monads + ogre + regular) (modules bap bap_project bap_self) (private_modules bap_project bap_self)) - (library (name bap_init_toplevel) (public_name bap.top) @@ -39,8 +34,8 @@ (modules bap_init_toplevel) (optional) (libraries - bap - bap-main - dune-site.toplevel ; to disable when toplevel support is not present - compiler-libs - compiler-libs.common)) + bap + bap-main + dune-site.toplevel ; to disable when toplevel support is not present + compiler-libs + compiler-libs.common)) diff --git a/lib/bap_abi/bap_abi.ml b/lib/bap_abi/bap_abi.ml index da6a3a0e0..9247b6a7f 100644 --- a/lib/bap_abi/bap_abi.ml +++ b/lib/bap_abi/bap_abi.ml @@ -1,17 +1,11 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std +let passes : (project -> project) list ref = ref [] +let pass proj = List.fold !passes ~init:proj ~f:(fun proj pass -> pass proj) +let register_pass pass = passes := pass :: !passes -let passes : (project -> project) list ref = ref [] - -let pass proj = - List.fold !passes ~init:proj ~f:(fun proj pass -> - pass proj) - -let register_pass pass = - passes := pass :: !passes - - -let name = Value.Tag.register (module String) - ~uuid:"ce10e129-4cae-4f49-9b21-8e00d3635067" - ~name:"abi-name" +let name = + Value.Tag.register + (module String) + ~uuid:"ce10e129-4cae-4f49-9b21-8e00d3635067" ~name:"abi-name" diff --git a/lib/bap_abi/bap_abi.mli b/lib/bap_abi/bap_abi.mli index 2e4b8365e..47b1d6ffe 100644 --- a/lib/bap_abi/bap_abi.mli +++ b/lib/bap_abi/bap_abi.mli @@ -1,27 +1,24 @@ (** ABI dispatcher. - This library accompanies the abi pass, and allows to inject - arbitrary transformation on the abi recognition phase. The - abi pass is run before the api pass. + This library accompanies the abi pass, and allows to inject arbitrary + transformation on the abi recognition phase. The abi pass is run before the + api pass. - - The library and the pass have no specific functionality, other - than running the passes, that were registered by specific - compiler, language or architecture specific modules. -*) + The library and the pass have no specific functionality, other than running + the passes, that were registered by specific compiler, language or + architecture specific modules. *) open Bap.Std -(** [pass] will apply all registered passes in the unspecified order *) val pass : project -> project +(** [pass] will apply all registered passes in the unspecified order *) -(** [register_pass pass] registers a pass for the later execution. - The pass will be run by a [api] pass. Usually the [pass] will - inspect the project structure, and if it is not recognized, then - it will just return the project untouched, otherwise it may - apply some transformations on the project, e.g., demangling, and - register further actions in the system, e.g., api transformations.*) val register_pass : (project -> project) -> unit - +(** [register_pass pass] registers a pass for the later execution. The pass will + be run by a [api] pass. Usually the [pass] will inspect the project + structure, and if it is not recognized, then it will just return the project + untouched, otherwise it may apply some transformations on the project, e.g., + demangling, and register further actions in the system, e.g., api + transformations.*) val name : string tag diff --git a/lib/bap_abi/dune b/lib/bap_abi/dune index 57db3b5aa..0a0d7e8e0 100644 --- a/lib/bap_abi/dune +++ b/lib/bap_abi/dune @@ -2,5 +2,6 @@ (name bap_abi) (public_name bap-abi) (wrapped false) - (preprocess (pps ppx_bap)) - (libraries bap core_kernel)) + (preprocess + (pps ppx_bap)) + (libraries bap core)) diff --git a/lib/bap_api/bap_api.ml b/lib/bap_api/bap_api.ml index 3dfdcbfa1..03b769ab0 100644 --- a/lib/bap_api/bap_api.ml +++ b/lib/bap_api/bap_api.ml @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std type filename = string @@ -6,9 +6,10 @@ type api = string module type S = sig type t + val language : string val parse : (string -> string option) -> string list -> t Or_error.t - val mapper : t -> Term.mapper + val mapper : t -> Term.mapper end type t = (module S) diff --git a/lib/bap_api/bap_api.mli b/lib/bap_api/bap_api.mli index 6e2f9d656..7cf5f91e6 100644 --- a/lib/bap_api/bap_api.mli +++ b/lib/bap_api/bap_api.mli @@ -1,47 +1,44 @@ (** An interface to the api pass. - Api pass will apply a high level model of a program [api] to a low - level representation, i.e., the [program term]. + Api pass will apply a high level model of a program [api] to a low level + representation, i.e., the [program term]. - The module is actually a static registry of language processors, - that will be applied by the api pass. + The module is actually a static registry of language processors, that will + be applied by the api pass. - The language processors are provide by plugins supporting - corresponding languages. Sometimes an abi information is required - to apply the api, so the processors are actually provided by even - lower level modules, that supports specific architectures. + The language processors are provide by plugins supporting corresponding + languages. Sometimes an abi information is required to apply the api, so the + processors are actually provided by even lower level modules, that supports + specific architectures. *) -*) - -open Core_kernel[@@warning "-D"] +open Core open Bap.Std type filename = string type api = string -(** Language processor interface. *) +(** Language processor interface. *) module type S = sig - type t - (** [language] a name of a language, e.g., ["C"] *) val language : string + (** [language] a name of a language, e.g., ["C"] *) - (** [parse get_api apis] creates a language processor from a list of - api. Function [get_api api] must return a name of an existing - file, that corresponds to the given [api]. The [apis] parameter - is a list of [api] names. *) val parse : (api -> filename option) -> api list -> t Or_error.t + (** [parse get_api apis] creates a language processor from a list of api. + Function [get_api api] must return a name of an existing file, that + corresponds to the given [api]. The [apis] parameter is a list of [api] + names. *) - (** the processor itself *) - val mapper : t -> Term.mapper + val mapper : t -> Term.mapper + (** the processor itself *) end -(** language processor type *) type t = (module S) +(** language processor type *) -(** apply the language processor *) val process : t -> unit +(** apply the language processor *) -(** enumerate all registered language processors *) val processors : unit -> t list +(** enumerate all registered language processors *) diff --git a/lib/bap_api/dune b/lib/bap_api/dune index 5f0052e73..4defa0fd1 100644 --- a/lib/bap_api/dune +++ b/lib/bap_api/dune @@ -1,6 +1,7 @@ (library - (name bap_api) - (public_name bap-api) - (wrapped false) - (preprocess (pps ppx_bap)) - (libraries bap core_kernel)) \ No newline at end of file + (name bap_api) + (public_name bap-api) + (wrapped false) + (preprocess + (pps ppx_bap)) + (libraries bap core)) diff --git a/lib/bap_bml/bap_bml.ml b/lib/bap_bml/bap_bml.ml index 1a3469811..2a3dcbbde 100644 --- a/lib/bap_bml/bap_bml.ml +++ b/lib/bap_bml/bap_bml.ml @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std [@@@warning "-D"] @@ -7,91 +7,106 @@ exception Parse_error of string module type Registry = sig type t + val register : string -> t -> unit val find : string -> t option val list : unit -> (string * t) list end -module Registry(T : T) = struct +module Registry (T : T) = struct type t = T.t + let registered : t String.Table.t = String.Table.create () let register name v = Hashtbl.set registered ~key:name ~data:v let find = Hashtbl.find registered let list () = Hashtbl.to_alist registered end - module type Ops = sig type t - module Nullary : Registry with type t = t + + module Nullary : Registry with type t = t module Unary : Registry with type t = string -> t end -module Ops(T : T) = struct +module Ops (T : T) = struct type t = T.t - module Nullary = Registry(T) - module Unary = Registry(struct type t = string -> T.t end) + + module Nullary = Registry (T) + + module Unary = Registry (struct + type t = string -> T.t + end) end -module Predicates = Ops(struct type t = bool Term.visitor end) -module Mappers = Ops(struct type t = Term.mapper end) +module Predicates = Ops (struct + type t = bool Term.visitor +end) +module Mappers = Ops (struct + type t = Term.mapper +end) let marker parse tag x = let x = parse x in object inherit Term.mapper as super - method! map_term cls t = - Term.set_attr t tag x |> super#map_term cls + method! map_term cls t = Term.set_attr t tag x |> super#map_term cls end -let has tag = object - inherit [bool] Term.visitor - method! enter_term cls t _ = Term.has_attr t tag -end +let has tag = + object + inherit [bool] Term.visitor + method! enter_term cls t _ = Term.has_attr t tag + end -module Cmp(T : Comparable) = struct +module Cmp (T : Comparable) = struct open T + let equal x t = x = t - let greater x t = x > t - let lesser x t = x < t + let greater x t = x > t + let lesser x t = x < t - let cmp test tag x cls t _ = match Term.get_attr t tag with - | None -> false - | Some y -> test x y + let cmp test tag x cls t _ = + match Term.get_attr t tag with None -> false | Some y -> test x y let make test parse tag x = let x = parse x in - object inherit [bool] Term.visitor + object + inherit [bool] Term.visitor method! enter_term = cmp test tag x end + let equal = make equal let greater = make greater let lesser = make lesser end -let (-) pref tag = pref ^ "-" ^ Value.Tag.name tag -let (+) pref suf = if String.is_empty suf then pref else pref^"-"^suf +let ( - ) pref tag = pref ^ "-" ^ Value.Tag.name tag +let ( + ) pref suf = if String.is_empty suf then pref else pref ^ "-" ^ suf let unit suf set is tag = - Mappers.Nullary.register (set-tag+suf) (marker Fn.id tag ()); - Predicates.Nullary.register (is-tag+suf) (has tag) + Mappers.Nullary.register (set - tag + suf) (marker Fn.id tag ()); + Predicates.Nullary.register (is - tag + suf) (has tag) module Markers = struct module Term = struct include Term + let unit = unit "" "set" "is" let () = unit synthetic; unit live; unit dead; - unit visited; + unit visited end module Sub = struct include Sub + let unit = unit "sub" "set" "is" + let () = unit const; unit pure; @@ -106,113 +121,115 @@ module Markers = struct module Arg = struct include Arg + let unit = unit "arg" "set" "is" let () = unit alloc_size; - unit nonnull; + unit nonnull end module Has = struct let unit = unit "" "set" "has" end - let () = - unit "" "set" "has" mark; + let () = unit "" "set" "has" mark end - -let expect exp got = - raise (Parse_error (sprintf "Expected %s got %S" exp got)) - +let expect exp got = raise (Parse_error (sprintf "Expected %s got %S" exp got)) module Color = struct - let colors = [ - "black", `black; - "red", `red; - "green", `green; - "yellow", `yellow; - "blue", `blue; - "magenta", `magenta; - "cyan", `cyan; - "white", `white; - "gray", `gray; - ] + let colors = + [ + ("black", `black); + ("red", `red); + ("green", `green); + ("yellow", `yellow); + ("blue", `blue); + ("magenta", `magenta); + ("cyan", `cyan); + ("white", `white); + ("gray", `gray); + ] let grammar = List.map colors ~f:fst |> String.concat ~sep:" | " - let color_t s = match List.Assoc.find ~equal:String.equal colors s with + let color_t s = + match List.Assoc.find ~equal:String.equal colors s with | Some c -> c | None -> expect grammar s - - let () = - let (:=) = Mappers.Unary.register in + let ( := ) = Mappers.Unary.register in "foreground" := marker color_t foreground; "background" := marker color_t background; - "color" := marker color_t color + "color" := marker color_t color module Colors = struct type t = color - include Comparable.Make(struct - type t = color [@@deriving bin_io, compare, sexp] - end) + + include Comparable.Make (struct + type t = color [@@deriving bin_io, compare, sexp] + end) end - include Cmp(Colors) + include Cmp (Colors) let () = - let (:=) = Predicates.Unary.register in + let ( := ) = Predicates.Unary.register in "has-foreground" := equal color_t foreground; "has-background" := equal color_t background; - "has-color" := equal color_t color + "has-color" := equal color_t color end - module Comment = struct - let () = - Mappers.Unary.register "comment" @@ - marker Fn.id comment; + let () = Mappers.Unary.register "comment" @@ marker Fn.id comment end module Python = struct - let () = - Mappers.Unary.register "python" @@ - marker Fn.id python; + let () = Mappers.Unary.register "python" @@ marker Fn.id python end module Taint = struct let has_attr cmp kind s = - object inherit [bool] Term.visitor + object + inherit [bool] Term.visitor + method! enter_term cls t _ = match Term.get_attr t kind with | None -> false - | Some seed -> match Tid.from_string s with - | Error _ -> false - | Ok seed' -> cmp seed seed' + | Some seed -> ( + match Tid.from_string s with + | Error _ -> false + | Ok seed' -> cmp seed seed') end let either (x : bool Term.visitor) (y : bool Term.visitor) = - object inherit [bool] Term.visitor + object + inherit [bool] Term.visitor + method! enter_term cls t _ = x#visit_term cls t false || y#visit_term cls t false end let seed tag = - object inherit Term.mapper as super - method! map_term cls t = - Term.set_attr t tag (Term.tid t) |> - super#map_term cls + object + inherit Term.mapper as super + + method! map_term cls t = + Term.set_attr t tag (Term.tid t) |> super#map_term cls end + let has_seed tag = has_attr Tid.equal tag - let has_seed tag = has_attr Tid.equal tag - let has_taint tag = has_attr (fun taints taint -> - Map.exists taints ~f:(fun taints -> Set.mem taints taint)) tag + let has_taint tag = + has_attr + (fun taints taint -> + Map.exists taints ~f:(fun taints -> Set.mem taints taint)) + tag let () = - let (:=) = Predicates.Nullary.register in + let ( := ) = Predicates.Nullary.register in "taints" := either (has Taint.reg) (has Taint.ptr); "taints-reg" := has Taint.reg; "taints-ptr" := has Taint.ptr; @@ -221,22 +238,24 @@ module Taint = struct "has-tainted-reg" := has Taint.regs let () = - let (:=) = Predicates.Unary.register in + let ( := ) = Predicates.Unary.register in "taints-reg" := has_seed Taint.ptr; "taints-ptr" := has_seed Taint.reg; "has-tainted-ptr" := has_taint Taint.ptrs; "has-tainted-reg" := has_taint Taint.regs let () = - let (:=) = Mappers.Nullary.register in + let ( := ) = Mappers.Nullary.register in "taint-ptr" := seed Taint.ptr; "taint-reg" := seed Taint.reg end module True = struct - let yes = object inherit [bool] Term.visitor - method! enter_term cls t _ = true - end + let yes = + object + inherit [bool] Term.visitor + method! enter_term cls t _ = true + end let () = Predicates.Nullary.register "true" yes end diff --git a/lib/bap_bml/bap_bml.mli b/lib/bap_bml/bap_bml.mli index c227bba07..0498c9b17 100644 --- a/lib/bap_bml/bap_bml.mli +++ b/lib/bap_bml/bap_bml.mli @@ -1,41 +1,38 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std (** BML - Bap Mapping Language. - A small DSL for mapping terms. See [bap --map-terms-help] for full - language grammar and description. This library implements BML's - standard library and can be used to extend the language with new - predicates and mapper. Just implement them and register using - corresponding module. -*) + A small DSL for mapping terms. See [bap --map-terms-help] for full language + grammar and description. This library implements BML's standard library and + can be used to extend the language with new predicates and mapper. Just + implement them and register using corresponding module. *) - -(** A parser error *) exception Parse_error of string +(** A parser error *) -(** Interface to a registry. - Registry is a key value storage.*) +(** Interface to a registry. Registry is a key value storage.*) module type Registry = sig type t - (** [register name value] register [value] with a given [name]. - If [name] was already associated with some other value, then - it will be superseded with the new binding. *) val register : string -> t -> unit + (** [register name value] register [value] with a given [name]. If [name] was + already associated with some other value, then it will be superseded with + the new binding. *) - (** [find name] find a value associated with the given [value] *) val find : string -> t option + (** [find name] find a value associated with the given [value] *) - (** [list ()] list all bindings *) val list : unit -> (string * t) list + (** [list ()] list all bindings *) end module type Ops = sig type t - module Nullary : Registry with type t = t - module Unary : Registry with type t = string -> t + + module Nullary : Registry with type t = t + module Unary : Registry with type t = string -> t end module Predicates : Ops with type t = bool Term.visitor -module Mappers : Ops with type t = Term.mapper +module Mappers : Ops with type t = Term.mapper diff --git a/lib/bap_bml/dune b/lib/bap_bml/dune index 05322bf76..871e20181 100644 --- a/lib/bap_bml/dune +++ b/lib/bap_bml/dune @@ -1,6 +1,7 @@ (library - (name bap_bml) - (public_name bap-bml) - (preprocess (pps ppx_bap)) - (wrapped false) - (libraries bap core_kernel)) + (name bap_bml) + (public_name bap-bml) + (preprocess + (pps ppx_bap)) + (wrapped false) + (libraries bap core)) diff --git a/lib/bap_build/bap_build.ml b/lib/bap_build/bap_build.ml index a6ece6c17..369d283ff 100644 --- a/lib/bap_build/bap_build.ml +++ b/lib/bap_build/bap_build.ml @@ -1,44 +1,43 @@ module Plugin_rules = struct module Fl = Findlib - open Printf open Ocamlbuild_plugin module Ocamlbuild = Ocamlbuild_pack - module List = ListLabels module String = Ocamlbuild_plugin.String - let (/) = Pathname.concat + let ( / ) = Pathname.concat let is_installed pkg = - try ignore (Fl.package_directory pkg); true + try + ignore (Fl.package_directory pkg); + true with Fl.No_such_package _ -> false - let default_packages = List.filter ~f:is_installed [ - "bap"; - "bap-main"; - "bap-core-theory"; - "bap-knowledge"; - "core_kernel"; - "ppx_bap"; - "regular" - ] - let default_predicates = [ - ] - - let default_tags = [ - "thread"; - "debug"; - "custom"; - ] @ List.map default_predicates ~f:(sprintf "predicate(%s)") + let default_packages = + List.filter ~f:is_installed + [ + "bap"; + "bap-main"; + "bap-core-theory"; + "bap-knowledge"; + "core"; + "ppx_bap"; + "regular"; + ] + + let default_predicates = [] + + let default_tags = + [ "thread"; "debug"; "custom" ] + @ List.map default_predicates ~f:(sprintf "predicate(%s)") let needs_threads ~predicates pkgs = let deps = Fl.package_deep_ancestors predicates pkgs in List.mem ~set:deps "threads" let infer_thread_predicates ~predicates pkg = - if needs_threads ~predicates pkg - then "mt" :: "mt_posix" :: predicates + if needs_threads ~predicates pkg then "mt" :: "mt_posix" :: predicates else predicates let bap_predicates ~native = @@ -54,65 +53,66 @@ module Plugin_rules = struct Fl.package_deep_ancestors predicates pkgs let set_default_options () : unit = - Options.(begin - use_ocamlfind := true; - ocaml_pkgs := default_packages; - tags := default_tags; - recursive := true; - end) + Options.( + use_ocamlfind := true; + ocaml_pkgs := default_packages; + tags := default_tags; + recursive := true) let interns () = topological_closure - ~predicates:(bap_predicates ~native:true) default_packages - + ~predicates:(bap_predicates ~native:true) + default_packages - let findlibs - ?(native=true) - ?(predicates=pkg_predicates ~native) - ~dynamic pkg = + let findlibs ?(native = true) ?(predicates = pkg_predicates ~native) ~dynamic + pkg = try - let preds = if dynamic - then "plugin" :: predicates - else predicates in - let arch,preds = Fl.package_property_2 preds pkg "archive" in + let preds = if dynamic then "plugin" :: predicates else predicates in + let arch, preds = Fl.package_property_2 preds pkg "archive" in let base = Fl.package_directory pkg in - if dynamic && not (List.mem ~set:preds (`Pred "plugin")) - then raise Not_found; - String.split_on_char ' ' arch |> - List.map ~f:(Fl.resolve_path ~base) + if dynamic && not (List.mem ~set:preds (`Pred "plugin")) then + raise Not_found; + String.split_on_char ' ' arch |> List.map ~f:(Fl.resolve_path ~base) with Not_found -> [] let externals pkgs = let interns = interns () in - pkgs |> - topological_closure ~predicates:(pkg_predicates ~native:true) |> - List.filter ~f:(fun dep -> not (List.mem ~set:interns dep)) + pkgs + |> topological_closure ~predicates:(pkg_predicates ~native:true) + |> List.filter ~f:(fun dep -> not (List.mem ~set:interns dep)) let packages () = externals !Options.ocaml_pkgs let symlink env = if Options.make_links.contents then - Cmd (S [A"ln"; A"-sf"; - P (env (!Options.build_dir / "%.plugin")); - A Pathname.parent_dir_name]) + Cmd + (S + [ + A "ln"; + A "-sf"; + P (env (!Options.build_dir / "%.plugin")); + A Pathname.parent_dir_name; + ]) else Nop - let link_shared_bytecode ~src ~dst = - Cmd (S [ - !Options.ocamlc; - A "-linkall"; A "-a"; - P src; A "-o"; Px dst - ]) + Cmd (S [ !Options.ocamlc; A "-linkall"; A "-a"; P src; A "-o"; Px dst ]) let link_shared_native ~src ~dst = - Cmd (S [ - !Options.ocamlopt; - A "-shared"; - A "-linkall"; - A "-ccopt"; A "-L"; - A "-ccopt"; A (Filename.dirname src); - P src; A "-o"; Px dst]) + Cmd + (S + [ + !Options.ocamlopt; + A "-shared"; + A "-linkall"; + A "-ccopt"; + A "-L"; + A "-ccopt"; + A (Filename.dirname src); + P src; + A "-o"; + Px dst; + ]) let generate_cmxs_of_lib lib = let dst = Filename.(basename (chop_extension lib) ^ ".cmxs") in @@ -124,88 +124,91 @@ module Plugin_rules = struct let generate_plugin_for_package code name = let native = code = `native in - let linker = match code with + let linker = + match code with | `native -> generate_cmxs_of_lib - | `byte -> generate_cma_of_lib in + | `byte -> generate_cma_of_lib + in match findlibs ~native ~dynamic:true name with | [] -> findlibs ~native ~dynamic:false name |> List.map ~f:linker - | xs -> - List.map xs ~f:(fun src -> cp src Pathname.current_dir_name) - + | xs -> List.map xs ~f:(fun src -> cp src Pathname.current_dir_name) let concat_map xs ~f = List.(concat (map xs ~f)) let generate_plugins_for_packages () = - packages () |> - concat_map ~f:(fun name -> - concat_map [`native; `byte] ~f:(fun code -> - generate_plugin_for_package code name)) + packages () + |> concat_map ~f:(fun name -> + concat_map [ `native; `byte ] ~f:(fun code -> + generate_plugin_for_package code name)) let make_list_option option = function | [] -> N - | xs -> S [A option; A (String.concat "," xs)] + | xs -> S [ A option; A (String.concat "," xs) ] let is_cmx file = Filename.check_suffix file ".cmx" let bundle env = let requires = - packages () |> concat_map ~f:(fun pkg -> - findlibs ~dynamic:false pkg |> - List.map ~f:(fun path -> - let name = path |> - Filename.chop_extension |> - Filename.basename in - name^"="^name^".cmxs,"^ - name^"="^name^".cma")) |> - make_list_option "-requires" in - let provides = Sys.readdir Pathname.current_dir_name |> - Array.to_list |> - List.filter ~f:is_cmx |> - List.map ~f:Filename.chop_extension |> - make_list_option "-provides" in - Cmd (S [ - A "bapbundle"; A "pack"; - T (Tags.of_list ["bundle"; "library"; "plugin"]); - A "-name"; A (env "%"); - A "-main"; A (env "%.cmxs"); - A "-main"; A (env "%.cma"); - requires; provides; - Px (env "%.plugin") - ]) + packages () + |> concat_map ~f:(fun pkg -> + findlibs ~dynamic:false pkg + |> List.map ~f:(fun path -> + let name = + path |> Filename.chop_extension |> Filename.basename + in + name ^ "=" ^ name ^ ".cmxs," ^ name ^ "=" ^ name ^ ".cma")) + |> make_list_option "-requires" + in + let provides = + Sys.readdir Pathname.current_dir_name + |> Array.to_list |> List.filter ~f:is_cmx + |> List.map ~f:Filename.chop_extension + |> make_list_option "-provides" + in + Cmd + (S + [ + A "bapbundle"; + A "pack"; + T (Tags.of_list [ "bundle"; "library"; "plugin" ]); + A "-name"; + A (env "%"); + A "-main"; + A (env "%.cmxs"); + A "-main"; + A (env "%.cma"); + requires; + provides; + Px (env "%.plugin"); + ]) let register_cmxs_of_cmxa_rule () = - rule "bap: cmxa & a -> cmxs" - ~prods:["%.cmxs"] - ~deps:["%.cmxa"; "%" -.- !Options.ext_lib] - (fun env _ -> - link_shared_native ~src:(env "%.cmxa") ~dst:(env "%.cmxs")) + rule "bap: cmxa & a -> cmxs" ~prods:[ "%.cmxs" ] + ~deps:[ "%.cmxa"; "%" -.- !Options.ext_lib ] + (fun env _ -> link_shared_native ~src:(env "%.cmxa") ~dst:(env "%.cmxs")) let register_collect_bundle_rule () = - rule "bap: cmxs & packages -> bundle" - ~deps:["%.cmxs"] - ~stamp:"%.bundle" + rule "bap: cmxs & packages -> bundle" ~deps:[ "%.cmxs" ] ~stamp:"%.bundle" (fun _ _ -> Seq (generate_plugins_for_packages ())) let register_plugin_rule () = - rule "bap: cmxs & cma & bundle -> plugin" - ~prods:["%.plugin"] - ~deps:["%.bundle"; "%.cmxs"; "%.cma"] - (fun env _ -> Seq [bundle env; symlink env]) + rule "bap: cmxs & cma & bundle -> plugin" ~prods:[ "%.plugin" ] + ~deps:[ "%.bundle"; "%.cmxs"; "%.cma" ] (fun env _ -> + Seq [ bundle env; symlink env ]) let pass_pp_to_link_phase () = - pflag ["ocaml"; "link"] "pp" (fun s -> S [A "-pp"; A s]) + pflag [ "ocaml"; "link" ] "pp" (fun s -> S [ A "-pp"; A s ]) let install () = register_cmxs_of_cmxa_rule (); register_collect_bundle_rule (); register_plugin_rule (); - pass_pp_to_link_phase (); + pass_pp_to_link_phase () end - module Std = struct - module Plugin_rules = Plugin_rules + module Plugin_options = struct let set = Plugin_rules.set_default_options end diff --git a/lib/bap_build/bap_build.mli b/lib/bap_build/bap_build.mli index 33108af0b..c023c8552 100644 --- a/lib/bap_build/bap_build.mli +++ b/lib/bap_build/bap_build.mli @@ -1,14 +1,12 @@ (** bapbuild implementation library *) - (** bapbuild support library. - Underneath the hood [bapbuild] is just a plugin to - [Ocamlbuild]. Instead of using the [bapbuild] utility, it is - possible to use custom [myocamlbuild.ml]. In that case to activate - the bapbuild behavior it is needed to call [Plugin_options.set ()] - function, and and install plugin rules in the [Before_rules] - phases, e.g., + Underneath the hood [bapbuild] is just a plugin to [Ocamlbuild]. Instead of + using the [bapbuild] utility, it is possible to use custom + [myocamlbuild.ml]. In that case to activate the bapbuild behavior it is + needed to call [Plugin_options.set ()] function, and and install plugin + rules in the [Before_rules] phases, e.g., {[ open Bap_build.Std @@ -16,27 +14,19 @@ let () = Plugin_options.set (); Ocamlbuild_plugin.dispatch (function - | Before_rules -> Plugin_rules.install () - | _ -> ()); - - ]} - - -*) + | Before_rules -> Plugin_rules.install () + | _ -> ()) + ]} *) module Std : sig module Plugin_rules : sig - + val install : unit -> unit (** [install ()] installs bap specific rules. - The function installs rules necessary for building bap - plugins.*) - val install : unit -> unit + The function installs rules necessary for building bap plugins.*) end module Plugin_options : sig - - - (** [set ()] set default options for [bapbuild]. *) val set : unit -> unit + (** [set ()] set default options for [bapbuild]. *) end end diff --git a/lib/bap_build/dune b/lib/bap_build/dune index e9d4a4801..b7a7045a2 100644 --- a/lib/bap_build/dune +++ b/lib/bap_build/dune @@ -1,5 +1,5 @@ (library - (name bap_build) - (public_name bap-build) - (wrapped false) - (libraries findlib ocamlbuild)) \ No newline at end of file + (name bap_build) + (public_name bap-build) + (wrapped false) + (libraries findlib ocamlbuild)) diff --git a/lib/bap_bundle/bap_bundle.ml b/lib/bap_bundle/bap_bundle.ml index 28d092dbf..0b3da8b1a 100644 --- a/lib/bap_bundle/bap_bundle.ml +++ b/lib/bap_bundle/bap_bundle.ml @@ -1,14 +1,12 @@ -open Core_kernel[@@warning "-D"] - -module Filename = Caml.Filename -module Sys = Caml.Sys +open Core +module Filename = Stdlib.Filename +module Sys = Stdlib.Sys module Std = struct exception Not_a_bundle - module Manifest = struct - let getenv key = try Sys.getenv key with Caml.Not_found -> "unknown" + let getenv key = try Sys.getenv key with Stdlib.Not_found -> "unknown" type t = { name : string; @@ -24,42 +22,43 @@ module Std = struct copyrights : string option; tags : string list; cons : string list; - } [@@deriving bin_io, compare, fields, sexp] - - let create - ?(author=getenv "USER") - ?(version="1.0.0") - ?main - ?(date=Caml_unix.time ()) - ?(desc = "description not provided") - ?(requires=[]) - ?(provides=[]) - ?url ?license ?copyrights - ?(tags=[]) - ?(cons=[]) name = { - name; author; version; date; desc; requires; provides; - copyrights; license; url; tags; cons; - main = Option.value main ~default:name; } + [@@deriving bin_io, compare, fields, sexp] + + let create ?(author = getenv "USER") ?(version = "1.0.0") ?main + ?(date = Caml_unix.time ()) ?(desc = "description not provided") + ?(requires = []) ?(provides = []) ?url ?license ?copyrights ?(tags = []) + ?(cons = []) name = + { + name; + author; + version; + date; + desc; + requires; + provides; + copyrights; + license; + url; + tags; + cons; + main = Option.value main ~default:name; + } let pp_sexp ppf = function - | Sexp.Atom _ - | Sexp.List [] -> () + | Sexp.Atom _ | Sexp.List [] -> () | Sexp.List (x :: xs) -> - let open Format in - let pp = Sexp.pp_hum_indent 1 in - fprintf ppf "@[(%a" pp x; - List.iter xs ~f:(fun x -> - fprintf ppf "@;%a" pp x); - fprintf ppf ")@]" + let open Format in + let pp = Sexp.pp_hum_indent 1 in + fprintf ppf "@[(%a" pp x; + List.iter xs ~f:(fun x -> fprintf ppf "@;%a" pp x); + fprintf ppf ")@]" let pp ppf m = let sexp = sexp_of_t m in Format.fprintf ppf "%a" pp_sexp sexp - let to_string m = - Format.asprintf "%a" pp m - + let to_string m = Format.asprintf "%a" pp m let of_string s = t_of_sexp (Sexp.of_string s) let manifest = @@ -67,9 +66,13 @@ module Std = struct ref (create name) let update m = manifest := m - let switch m = let old = !manifest in manifest := m; old - let current () = !manifest + let switch m = + let old = !manifest in + manifest := m; + old + + let current () = !manifest end type manifest = Manifest.t @@ -78,62 +81,44 @@ module Std = struct let manifest = "MANIFEST.scm" end - - module Bundle = struct + module Bundle = struct type filename = string - - type bundle = { - name : string; - path : filename; - } + type bundle = { name : string; path : filename } type t = bundle let main = let name = Filename.basename Sys.executable_name in - ref { - name; - path = name ^ ".bundle" - } + ref { name; path = name ^ ".bundle" } let open_in uri = - try Zip.open_in uri with - | Zip.Error _ -> raise Not_a_bundle - + try Zip.open_in uri with Zip.Error _ -> raise Not_a_bundle module Builder = struct type t = { mutable manifest : manifest option; mutable files : (string option * Uri.t) list; - mutable data : ([`Name of string] * string) list; - } - - let create () = { - manifest = None; - files = []; - data = []; + mutable data : ([ `Name of string ] * string) list; } - let embed_manifest b manifest = - b.manifest <- Some manifest - - let put_file ?name b uri = - b.files <- (name,uri) :: b.files - - let put_data b ~name ~data = - b.data <- (`Name name,data) :: b.data + let create () = { manifest = None; files = []; data = [] } + let embed_manifest b manifest = b.manifest <- Some manifest + let put_file ?name b uri = b.files <- (name, uri) :: b.files + let put_data b ~name ~data = b.data <- (`Name name, data) :: b.data let flush b uri = let path = Uri.path uri in let zip = Zip.open_out path in - List.iter b.files ~f:(fun (name,uri) -> + List.iter b.files ~f:(fun (name, uri) -> let path = Uri.path uri in let name = Option.value name ~default:path in Zip.copy_file_to_entry path zip name); - List.iter b.data ~f:(fun (`Name name,data) -> + List.iter b.data ~f:(fun (`Name name, data) -> Zip.add_entry data zip name); - let man = match b.manifest with + let man = + match b.manifest with | Some man -> man - | None -> Manifest.create "noname" in + | None -> Manifest.create "noname" + in let mdata = Manifest.to_string man in Zip.add_entry ~level:0 mdata zip Nameof.manifest; Zip.close_out zip @@ -141,10 +126,9 @@ module Std = struct let input b f = let zip = open_in b.path in - protect ~f:(fun () -> f zip) - ~finally:(fun () -> Zip.close_in zip) + protect ~f:(fun () -> f zip) ~finally:(fun () -> Zip.close_in zip) - let (>>>) = input + let ( >>> ) = input let manifest b = b >>> fun zip -> @@ -154,97 +138,99 @@ module Std = struct let of_uri uri = let path = Uri.path uri in let base = Filename.basename path in - let name = if String.mem base '.' - then Filename.chop_extension base else base in - {name; path} + let name = + if String.mem base '.' then Filename.chop_extension base else base + in + { name; path } let get_file ?name b uri = - b >>> fun zip -> try + b >>> fun zip -> + try let path = Uri.path uri in let entry = Zip.find_entry zip path in let name = Option.value name ~default:path in Zip.copy_entry_to_file zip entry name; Some (Uri.of_string name) - with Caml.Not_found -> None + with Stdlib.Not_found -> None let get_data b name = b >>> fun zip -> try Some Zip.(read_entry zip (find_entry zip name)) - with Caml.Not_found -> None + with Stdlib.Not_found -> None let list b = b >>> fun zip -> - Zip.entries zip |> List.filter_map ~f:(fun e -> - let name = Zip.(e.filename) in - Option.some_if (not (String.equal name Nameof.manifest)) name) + Zip.entries zip + |> List.filter_map ~f:(fun e -> + let name = Zip.(e.filename) in + Option.some_if (not (String.equal name Nameof.manifest)) name) let transform files bundle ~f = let zin = open_in bundle.path in - let store filename data = - Hashtbl.set files ~key:filename ~data in - Zip.entries zin |> List.iter ~f:(fun entry -> - let filename = Zip.(entry.filename) in - let process_file f = - let name,chan = - Filename.open_temp_file "bundle" "entry" in - Zip.copy_entry_to_channel zin entry chan; - Out_channel.close chan; - f name; - store filename (`Move name) in - let process_data f = - let data = Zip.read_entry zin entry in - store filename (`Data (f data)) in - match f filename with - | `Map f -> process_data f - | `Proc f -> process_file f - | `Copy -> process_file ignore - | `Drop -> ()); + let store filename data = Hashtbl.set files ~key:filename ~data in + Zip.entries zin + |> List.iter ~f:(fun entry -> + let filename = Zip.(entry.filename) in + let process_file f = + let name, chan = Filename.open_temp_file "bundle" "entry" in + Zip.copy_entry_to_channel zin entry chan; + Out_channel.close chan; + f name; + store filename (`Move name) + in + let process_data f = + let data = Zip.read_entry zin entry in + store filename (`Data (f data)) + in + match f filename with + | `Map f -> process_data f + | `Proc f -> process_file f + | `Copy -> process_file ignore + | `Drop -> ()); Zip.close_in zin; let zout = Zip.open_out bundle.path in Hashtbl.iteri files ~f:(fun ~key:name ~data -> match data with | `Data s -> Zip.add_entry ~level:0 s zout name | `Copy f -> Zip.copy_file_to_entry ~level:0 f zout name - | `Move f -> Zip.copy_file_to_entry ~level:0 f zout name; - Sys.remove f); + | `Move f -> + Zip.copy_file_to_entry ~level:0 f zout name; + Sys.remove f); Zip.close_out zout - let update bundle ~f = - transform (String.Table.create ()) bundle ~f + let update bundle ~f = transform (String.Table.create ()) bundle ~f let insert bundle files = let files = String.Table.of_alist_exn files in transform files bundle ~f:(fun _ -> `Copy) let insert_files bundle files = - insert bundle (List.map files ~f:(fun (name,uri) -> - let path = Uri.path uri in - let name = Option.value name ~default:path in - name,`Copy path)) + insert bundle + (List.map files ~f:(fun (name, uri) -> + let path = Uri.path uri in + let name = Option.value name ~default:path in + (name, `Copy path))) let insert_chunks bundle data = - insert bundle (List.map data ~f:(fun (`Name name,data) -> - name,`Data data)) + insert bundle + (List.map data ~f:(fun (`Name name, data) -> (name, `Data data))) - let insert_file ?name bundle file = - insert_files bundle [name,file] + let insert_file ?name bundle file = insert_files bundle [ (name, file) ] let insert_data bundle ~name ~data = - insert_chunks bundle [`Name name, data] + insert_chunks bundle [ (`Name name, data) ] let update_manifest bundle ~f = update bundle ~f:(fun file -> - if String.equal file Nameof.manifest - then `Map (fun s -> Manifest.(of_string s |> f |> to_string)) + if String.equal file Nameof.manifest then + `Map (fun s -> Manifest.(of_string s |> f |> to_string)) else `Copy) - end - type bundle = Bundle.t + type bundle = Bundle.t let set_manifest_of_bundle bundle = - try Manifest.update (Bundle.manifest bundle) - with _ -> () + try Manifest.update (Bundle.manifest bundle) with _ -> () let set_main_bundle bundle = Bundle.main := bundle; diff --git a/lib/bap_bundle/bap_bundle.mli b/lib/bap_bundle/bap_bundle.mli index 78be08ed2..df6f7dcf3 100644 --- a/lib/bap_bundle/bap_bundle.mli +++ b/lib/bap_bundle/bap_bundle.mli @@ -1,16 +1,15 @@ -open Core_kernel[@@warning "-D"] +open Core module Std : sig - exception Not_a_bundle + type bundle - (** [main_bundle ()] returns a program's bundle if the program is - bundled, otherwise creates a fresh new bundle in current working - directory. The name of the bundle is a basename of - [Sys.executable_name] with a [bundle] extension. *) val main_bundle : unit -> bundle - + (** [main_bundle ()] returns a program's bundle if the program is bundled, + otherwise creates a fresh new bundle in current working directory. The + name of the bundle is a basename of [Sys.executable_name] with a [bundle] + extension. *) (**/**) @@ -23,27 +22,25 @@ module Std : sig (**/**) - - - (** Program meta information. *) + (** Program meta information. *) module Manifest : sig type t = { - name : string; (** program name *) - version : string; (** program version *) - desc : string; (** one line description *) - main : string; (** entry point *) - author : string; (** program author *) - date : float; (** last update date *) - requires : string list; (** required libraries *) - provides : string list; (** provided features *) - url : string option; (** project URL *) + name : string; (** program name *) + version : string; (** program version *) + desc : string; (** one line description *) + main : string; (** entry point *) + author : string; (** program author *) + date : float; (** last update date *) + requires : string list; (** required libraries *) + provides : string list; (** provided features *) + url : string option; (** project URL *) license : string option; (** project license *) - copyrights : string option; (** copyright holders *) - tags : string list; (** bundle tags *) - cons : string list; (** bundle constraints *) - } [@@deriving bin_io, compare, fields, sexp] + copyrights : string option; (** copyright holders *) + tags : string list; (** bundle tags *) + cons : string list; (** bundle constraints *) + } + [@@deriving bin_io, compare, fields, sexp] - (** [create name] create a bundle for a program with a given [name] *) val create : ?author:string -> ?version:string -> @@ -56,7 +53,10 @@ module Std : sig ?license:string -> ?copyrights:string -> ?tags:string list -> - ?cons:string list -> string -> t + ?cons:string list -> + string -> + t + (** [create name] create a bundle for a program with a given [name] *) include Stringable with type t := t @@ -67,141 +67,113 @@ module Std : sig type manifest = Manifest.t - - (** Program Bundle. - Bundle is a collection of data associated with a program. To - access the bundle, use the {!main_bundle} function, e.g., + Bundle is a collection of data associated with a program. To access the + bundle, use the {!main_bundle} function, e.g., {[ open Bap_bundle.Std let bundle = main_bundle () - ]} - - *) + ]} *) module Bundle : sig type t = bundle - (** creates new bundle or opens existing *) val of_uri : Uri.t -> t + (** creates new bundle or opens existing *) - - (** [manifest bundle] extracts program manifest from the [bundle] *) val manifest : t -> manifest + (** [manifest bundle] extracts program manifest from the [bundle] *) - - (** [get_file ?name bundle uri] extracts a file. - - Extracts a file specified by a [uri] from a [bundle] and returns - a uri pointing to the extracted file, if was found. The - optional parameter [name] allows to specify the desired - filename for the extraction. *) val get_file : ?name:string -> t -> Uri.t -> Uri.t option + (** [get_file ?name bundle uri] extracts a file. + Extracts a file specified by a [uri] from a [bundle] and returns a uri + pointing to the extracted file, if was found. The optional parameter + [name] allows to specify the desired filename for the extraction. *) - (** [get_data bundle path] extracts data specified by a [path] as - a string. *) val get_data : t -> string -> string option + (** [get_data bundle path] extracts data specified by a [path] as a string. + *) - - (** [list bundle] returns a list of paths, that are accessible in - the [bundle]. *) val list : t -> string list + (** [list bundle] returns a list of paths, that are accessible in the + [bundle]. *) - (** [update_manifest bundle ~f] update program manifest with - function [f]. *) val update_manifest : t -> f:(manifest -> manifest) -> unit - - - (** [insert_files bundle spec] bundle files. - - The [spec] is a list of pairs, where the first constituent of - a pair is a desired path of the file in the bundle, and the - second constituent is the uri of the file, that should be - inserted. If the first element of the pair is [None], then the - file will be inserted under the same path, as it was in the - file system. + (** [update_manifest bundle ~f] update program manifest with function [f]. *) + val insert_files : t -> (string option * Uri.t) list -> unit + (** [insert_files bundle spec] bundle files. + The [spec] is a list of pairs, where the first constituent of a pair is + a desired path of the file in the bundle, and the second constituent is + the uri of the file, that should be inserted. If the first element of + the pair is [None], then the file will be inserted under the same path, + as it was in the file system. *) - (** [insert_file ?name bundle uri] insert a file specified by the - [uri]. If [name] is specified, then the file will be stored - under the specified [name] in the bundle.*) val insert_file : ?name:string -> t -> Uri.t -> unit + (** [insert_file ?name bundle uri] insert a file specified by the [uri]. If + [name] is specified, then the file will be stored under the specified + [name] in the bundle.*) - - (** [insert_data bundle ~name ~data] insert [data] at path [name]. *) val insert_data : t -> name:string -> data:string -> unit + (** [insert_data bundle ~name ~data] insert [data] at path [name]. *) - (** [update bundle ~f:action] add, remove or update data in the - bundle. + val update : + t -> + f: + (string -> + [ `Drop | `Copy | `Proc of string -> unit | `Map of string -> string ]) -> + unit + (** [update bundle ~f:action] add, remove or update data in the bundle. This is a swiss-knife function, that can do arbitrary bundle modification. See {!get_file}, {!get_data}, {!insert_files}, - {!insert_file} and {!insert_data} for an easier to use - interface. + {!insert_file} and {!insert_data} for an easier to use interface. - - The [action] function is called on each path, - and must return one of the following: + The [action] function is called on each path, and must return one of the + following: - [`Drop] - to remove the path from the bundle; - [`Copy] - to keep it untouched; - - [`Proc f] - extract it, process the file with a function - [f], and put back, where function [f] accepts - a temporary name of extracted file; + - [`Proc f] - extract it, process the file with a function [f], and put + back, where function [f] accepts a temporary name of extracted file; - [`Map f] - map the contents of the file with function [f]. - Warning. Modification of a bundle, associated with an - installed plugin or application will lead to an undefined - behavior. The function is intended for building a new - bundle. Once it is created it may be sealed and made - readonly.*) - val update : t -> f:(string -> [ - | `Drop - | `Copy - | `Proc of (string -> unit) - | `Map of (string -> string) - ]) -> unit - - + Warning. Modification of a bundle, associated with an installed plugin + or application will lead to an undefined behavior. The function is + intended for building a new bundle. Once it is created it may be sealed + and made readonly.*) (** Incremental bundle builder. - Using this module it is possible to build bundle - recipe incrementally. The data, that was added to - the bundle will not be copied until the [flush] method is - called. *) + Using this module it is possible to build bundle recipe incrementally. + The data, that was added to the bundle will not be copied until the + [flush] method is called. *) module Builder : sig type t - - (** [create ()] creates a builder. *) val create : unit -> t + (** [create ()] creates a builder. *) - - (** [put_file ?name builder uri] insert a file specified by the - [uri]. If [name] is specified, then the file will be stored - under the specified [name] in the bundle.*) val put_file : ?name:string -> t -> Uri.t -> unit + (** [put_file ?name builder uri] insert a file specified by the [uri]. If + [name] is specified, then the file will be stored under the specified + [name] in the bundle.*) - - (** [put_data builder ~name ~data] insert [data] at path - [name]. *) val put_data : t -> name:string -> data:string -> unit + (** [put_data builder ~name ~data] insert [data] at path [name]. *) - - (** [embed_manifest builder manifest] embeds a manifest. If it - was already embedded, then old one will be overwritten. *) val embed_manifest : t -> manifest -> unit + (** [embed_manifest builder manifest] embeds a manifest. If it was already + embedded, then old one will be overwritten. *) - - (** [flush builder output] finish the building and output the - resulting bundle into the file [output]. *) val flush : t -> Uri.t -> unit + (** [flush builder output] finish the building and output the resulting + bundle into the file [output]. *) end end end diff --git a/lib/bap_bundle/dune b/lib/bap_bundle/dune index 93b424109..f06bcd1ab 100644 --- a/lib/bap_bundle/dune +++ b/lib/bap_bundle/dune @@ -1,15 +1,21 @@ (library - (name bap_bundle) - (public_name bap-bundle) - (wrapped false) - (preprocess (pps ppx_bap)) - (libraries uri camlzip unix core_kernel core_kernel.caml_unix)) + (name bap_bundle) + (public_name bap-bundle) + (wrapped false) + (preprocess + (pps ppx_bap)) + (libraries uri camlzip unix core core_kernel.caml_unix)) (rule - (target bap_bundle_config.ml) - (deps bap_bundle_config.ml.ab (alias ../../config)) - (action - (with-stdin-from %{deps} - (with-stdout-to %{target} - (chdir %{workspace_root} - (run ./tools/rewrite.exe -filename %{deps})))))) + (target bap_bundle_config.ml) + (deps + bap_bundle_config.ml.ab + (alias ../../config)) + (action + (with-stdin-from + %{deps} + (with-stdout-to + %{target} + (chdir + %{workspace_root} + (run ./tools/rewrite.exe -filename %{deps})))))) diff --git a/lib/bap_byteweight/bap_byteweight.ml b/lib/bap_byteweight/bap_byteweight.ml index 3fc8e609d..27437c2f8 100644 --- a/lib/bap_byteweight/bap_byteweight.ml +++ b/lib/bap_byteweight/bap_byteweight.ml @@ -1,9 +1,10 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std module type Corpus = sig type t type key + val look : t -> length:int -> int -> key option end @@ -15,19 +16,12 @@ module type S = sig val create : unit -> t val train : t -> max_length:int -> (key -> bool) -> corpus -> unit val length : t -> int - - val next : t -> - length:int -> - threshold:float -> - corpus -> int -> int option - + val next : t -> length:int -> threshold:float -> corpus -> int -> int option val pp : Format.formatter -> t -> unit end -module Make - (Corpus : Corpus) - (Trie : Trie.S with type key = Corpus.key) = struct - +module Make (Corpus : Corpus) (Trie : Trie.S with type key = Corpus.key) = +struct module Bin = struct type t = (int * int) Trie.t [@@deriving bin_io, sexp] end @@ -37,27 +31,26 @@ module Make type key = Corpus.key let create = Trie.create - - let pp_pair fmt (x,y) = Format.fprintf fmt "(%d,%d)" x y + let pp_pair fmt (x, y) = Format.fprintf fmt "(%d,%d)" x y let pp = Trie.pp pp_pair let train ~max_length test set pass trie = - let rec outer = function - | 0 -> () - | n -> inner n 0 - and inner length m = match Corpus.look set ~length m,pass with - | None,_ -> outer (length - 1) + let rec outer = function 0 -> () | n -> inner n 0 + and inner length m = + match (Corpus.look set ~length m, pass) with + | None, _ -> outer (length - 1) | Some s, `Pos when test s -> - Trie.change trie s (function - | None -> Some (1,0) - | Some (a,b) -> Some (a+1,b)); - inner length (m+1) + Trie.change trie s (function + | None -> Some (1, 0) + | Some (a, b) -> Some (a + 1, b)); + inner length (m + 1) | Some s, `Neg -> - Trie.change trie s (function - | Some (m,n) when not(test s) -> Some (m,n+1) + Trie.change trie s (function + | Some (m, n) when not (test s) -> Some (m, n + 1) | x -> x); - inner length (m+1) - | _ -> inner length (m+1) in + inner length (m + 1) + | _ -> inner length (m + 1) + in outer max_length let train trie ~max_length test set = @@ -67,128 +60,135 @@ module Make let test ~threshold trie key = match Trie.longest_match trie key with | None -> false - | Some (_,(a,b)) -> - let n = a + b in - Float.(of_int a / of_int n > threshold) + | Some (_, (a, b)) -> + let n = a + b in + Float.(of_int a / of_int n > threshold) let next_if (trie : t) ~length ~f set n = let open Option.Monad_infix in let rec loop n = Corpus.look set ~length n >>= fun key -> match Trie.longest_match trie key with - | None -> loop (n+1) - | Some (len,stats) -> - if f key len stats - then Some n - else loop (n+1) in + | None -> loop (n + 1) + | Some (len, stats) -> if f key len stats then Some n else loop (n + 1) + in loop n - let next trie ~length ~threshold set n = - next_if trie ~length set n ~f:(fun _ _ (a,b) -> + next_if trie ~length set n ~f:(fun _ _ (a, b) -> let n = a + b in Float.(of_int a / of_int n > threshold)) let length = Trie.length end -module Make2 - (Corpus : Corpus) - (Trie : Trie.V2.S with type key = Corpus.key) = struct - include Make(Corpus)(Trie) +module Make2 (Corpus : Corpus) (Trie : Trie.V2.S with type key = Corpus.key) = +struct + include Make (Corpus) (Trie) + type token = Trie.token + let fold = Trie.fold end module Bytes = struct - module Self = Make2(struct - type t = mem - type key = mem - - let create mem = mem - - let look mem ~length n = - let from = Addr.(Memory.min_addr mem ++ n) in - match Memory.view ~from ~words:length mem with - | Ok mem -> Some mem - | _ -> None - end)(Memory.Trie.Stable.V1.R8) - - let t = Bap_byteweight_signatures.Data.declare "bytes" + module Self = + Make2 + (struct + type t = mem + type key = mem + + let create mem = mem + + let look mem ~length n = + let from = Addr.(Memory.min_addr mem ++ n) in + match Memory.view ~from ~words:length mem with + | Ok mem -> Some mem + | _ -> None + end) + (Memory.Trie.Stable.V1.R8) + + let t = + Bap_byteweight_signatures.Data.declare "bytes" ~load:(fun bytes -> - Binable.of_string (module Self) - (Caml.Bytes.unsafe_to_string bytes)) + Binable.of_string (module Self) (Stdlib.Bytes.unsafe_to_string bytes)) ~save:(fun data -> - Caml.Bytes.unsafe_of_string @@ - Binable.to_string (module Self) data) + Stdlib.Bytes.unsafe_of_string @@ Binable.to_string (module Self) data) include Self - let find bw ~length ~threshold mem = let start = Memory.min_addr mem in let rec loop acc n = match next bw ~length ~threshold mem n with - | Some n -> loop (Addr.(start ++ n) :: acc) (n+1) - | None -> List.rev acc in + | Some n -> loop (Addr.(start ++ n) :: acc) (n + 1) + | None -> List.rev acc + in loop [] 0 let find_if bw ~length ~f mem = let start = Memory.min_addr mem in let rec loop acc n = match next_if bw ~length ~f mem n with - | Some n -> loop (Addr.(start ++ n) :: acc) (n+1) - | None -> List.rev acc in + | Some n -> loop (Addr.(start ++ n) :: acc) (n + 1) + | None -> List.rev acc + in loop [] 0 let p1 m n = float m /. float (m + n) and p0 m n = float n /. float (m + n) let find_using_bayes_factor sigs ~min_length ~max_length threshold = - let (s1,s0) = fold sigs ~init:(0,0) ~f:(fun (s1,s0) key (h1,h0) -> - let length = List.length key in - if length >= min_length && length <= max_length then - h1 + s1, h0 + s0 - else s1,s0) in + let s1, s0 = + fold sigs ~init:(0, 0) ~f:(fun (s1, s0) key (h1, h0) -> + let length = List.length key in + if length >= min_length && length <= max_length then (h1 + s1, h0 + s0) + else (s1, s0)) + in let ph1 = float s1 /. float (s1 + s0) in let ph0 = 1. -. ph1 in let ratio m n = - let r = p1 m n /. p0 m n - and q = ph1 /. ph0 in - r *. q in - find_if sigs ~length:max_length ~f:(fun _ length (h1,h0) -> - length >= min_length && - Float.(ratio h1 h0 > threshold)) + let r = p1 m n /. p0 m n and q = ph1 /. ph0 in + r *. q + in + find_if sigs ~length:max_length ~f:(fun _ length (h1, h0) -> + length >= min_length && Float.(ratio h1 h0 > threshold)) let find_using_threshold sigs ~min_length ~max_length threshold = - find_if sigs ~length:max_length ~f:(fun _ length (h1,h0) -> - length >= min_length && - Float.(p1 h1 h0 > threshold)) - - let pp_byte ppf x = - Format.fprintf ppf "%02x" @@ Word.to_int_exn x - - let pp ppf t = fold t ~init:() ~f:(fun () words (a,b) -> - let p1h1 = float a /. float (a+b) in - Format.fprintf ppf "%-8d %-8d %-8d %.4f " (a+b) a b p1h1; - List.iter words ~f:(Format.fprintf ppf "%a" pp_byte); - Format.fprintf ppf "@\n"); + find_if sigs ~length:max_length ~f:(fun _ length (h1, h0) -> + length >= min_length && Float.(p1 h1 h0 > threshold)) + + let pp_byte ppf x = Format.fprintf ppf "%02x" @@ Word.to_int_exn x + + let pp ppf t = + fold t ~init:() ~f:(fun () words (a, b) -> + let p1h1 = float a /. float (a + b) in + Format.fprintf ppf "%-8d %-8d %-8d %.4f " (a + b) a b p1h1; + List.iter words ~f:(Format.fprintf ppf "%a" pp_byte); + Format.fprintf ppf "@\n") end type stats = int * int module V1 = struct module type S = S + module Make = Make end + module V2 = struct module type S = sig include V1.S type token - val next_if : t -> length:int -> f:(key -> int -> stats -> bool) -> corpus -> - int -> int option + val next_if : + t -> + length:int -> + f:(key -> int -> stats -> bool) -> + corpus -> + int -> + int option val fold : t -> init:'b -> f:('b -> token list -> stats -> 'b) -> 'b end @@ -198,7 +198,8 @@ end module Stats = struct type t = stats - let trials (a,b) = a + b + + let trials (a, b) = a + b let h1 = fst let h0 = snd end diff --git a/lib/bap_byteweight/bap_byteweight.mli b/lib/bap_byteweight/bap_byteweight.mli index 2a007245b..3845e74d4 100644 --- a/lib/bap_byteweight/bap_byteweight.mli +++ b/lib/bap_byteweight/bap_byteweight.mli @@ -1,223 +1,190 @@ +open Bap.Std (** Byteweight library. - Byteweight is a function start identification algorithm [[1]]. This - library provides a functorized implementation. - - An auxiliary {!Bap_byteweight_signatures} library provides an - access to the repository of binary signatures. + Byteweight is a function start identification algorithm [[1]]. This library + provides a functorized implementation. - @see - - {v - [1]: Bao, Tiffany, et al. "Byteweight: Learning to recognize functions in binary code." - 23rd USENIX Security Symposium (USENIX Security 14). 2014. - v} -*) -open Bap.Std + An auxiliary {!Bap_byteweight_signatures} library provides an access to the + repository of binary signatures. + @see + {v + [1]: Bao, Tiffany, et al. "Byteweight: Learning to recognize functions in binary code." + 23rd USENIX Security Symposium (USENIX Security 14). 2014. + v} *) type stats (** Data interface. - This is an interface of a type that is used to represent the - data.*) + This is an interface of a type that is used to represent the data.*) module type Corpus = sig type t type key - (** [look data ~length offset] extract data of specified [length] at - the given [offset]. Returns a key that represents this chunk of - data (if the data can be extracted). + val look : t -> length:int -> int -> key option + (** [look data ~length offset] extract data of specified [length] at the given + [offset]. Returns a key that represents this chunk of data (if the data + can be extracted). - Note 1 - for simple data representations, like strings, types - [t] and [key] can be unified, and the [look] function is just a - regular substring extraction function. + Note 1 - for simple data representations, like strings, types [t] and + [key] can be unified, and the [look] function is just a regular substring + extraction function. - Note 2 - the key type is unified with the [key] of {!Trie} data - structure, so one will need to implement the {!Trie.Key} - interface. - *) - val look : t -> length:int -> int -> key option + Note 2 - the key type is unified with the [key] of {!Trie} data structure, + so one will need to implement the {!Trie.Key} interface. *) end - (** Byteweight algorithm interface. - Byteweight is a supervised machine learning algorithm. Based on - the input string, where each substrings is labeled by true of - false, a function is inferred that can map substrings into boolean - domain. - - For example, if the label function teaches whether the given - substring is a start of a function, we can infer an algorithm for - finding function starts. + Byteweight is a supervised machine learning algorithm. Based on the input + string, where each substrings is labeled by true of false, a function is + inferred that can map substrings into boolean domain. + For example, if the label function teaches whether the given substring is a + start of a function, we can infer an algorithm for finding function starts. *) module type S = sig type t [@@deriving bin_io, sexp] type key type corpus - - (** [create ()] creates an empty instance of the byteweigth decider. *) val create : unit -> t + (** [create ()] creates an empty instance of the byteweigth decider. *) - - (** [train decider ~max_length test corpus] train the [decider] on - the specified [corpus]. The [test] function classifies extracted - substrings. The [max_length] parameter binds the maximum - length of substrings. *) val train : t -> max_length:int -> (key -> bool) -> corpus -> unit + (** [train decider ~max_length test corpus] train the [decider] on the + specified [corpus]. The [test] function classifies extracted substrings. + The [max_length] parameter binds the maximum length of substrings. *) - - (** [length decider] total amount of different substrings known to a - decider. *) val length : t -> int - + (** [length decider] total amount of different substrings known to a decider. + *) (** [next t ~length ~threshold data begin] the next positive chunk. - Returns an offset that is greater than [begin] of the next - longest substring up to the given [length], for which + Returns an offset that is greater than [begin] of the next longest + substring up to the given [length], for which [h1 / (h0 + h1) > threshold]. - This is a specialization of the [next_if] function from the - extended [V1.V2.S] interface. - *) - - val next : t -> - length:int -> - threshold:float -> corpus -> int -> int option + This is a specialization of the [next_if] function from the extended + [V1.V2.S] interface. *) + val next : t -> length:int -> threshold:float -> corpus -> int -> int option - (** [pp ppf decider] prints all known to decider chunks. *) val pp : Format.formatter -> t -> unit + (** [pp ppf decider] prints all known to decider chunks. *) end module V1 : sig module type S = S - module Make - (Corpus : Corpus) - (Trie : Trie.S with type key = Corpus.key) : - S with type key = Corpus.key - and type corpus = Corpus.t + module Make (Corpus : Corpus) (Trie : Trie.S with type key = Corpus.key) : + S with type key = Corpus.key and type corpus = Corpus.t end module V2 : sig module type S = sig include V1.S - type token + type token + val next_if : + t -> + length:int -> + f:(key -> int -> stats -> bool) -> + corpus -> + int -> + int option (** [next_if t ~length ~f data begin] the next chunk that [f]. - Finds the next offset greater than [begin] of a string of - the given [length] for which there was an observing of a - substring [s] with length [n] and statistics [stats], such - that [f s n stats] is [true]. - *) - val next_if : t -> length:int -> f:(key -> int -> stats -> bool) -> corpus -> - int -> int option + Finds the next offset greater than [begin] of a string of the given + [length] for which there was an observing of a substring [s] with length + [n] and statistics [stats], such that [f s n stats] is [true]. *) - - (** [fold t ~init ~f] applies [f] to all chunks known to the decider. *) val fold : t -> init:'b -> f:('b -> token list -> stats -> 'b) -> 'b + (** [fold t ~init ~f] applies [f] to all chunks known to the decider. *) end - module Make - (Corpus : Corpus) - (Trie : Trie.V2.S with type key = Corpus.key) : - S with type key = Corpus.key + module Make (Corpus : Corpus) (Trie : Trie.V2.S with type key = Corpus.key) : + S + with type key = Corpus.key and type corpus = Corpus.t and type token = Trie.token end -(** [Make(Corpus)(Trie)] creates a Byteweight procedure, - that works in the [Corpus] domain and uses [Trie] for its - implementation. - - The [Trie] module specifies how substrings are compared. In - particular, it gives an opportunity, to implement normalized - string comparison.*) +(** [Make(Corpus)(Trie)] creates a Byteweight procedure, that works in the + [Corpus] domain and uses [Trie] for its implementation. -module Make - (Corpus : Corpus) - (Trie : Trie.S with type key = Corpus.key) : - S with type key = Corpus.key - and type corpus = Corpus.t + The [Trie] module specifies how substrings are compared. In particular, it + gives an opportunity, to implement normalized string comparison.*) +module Make (Corpus : Corpus) (Trie : Trie.S with type key = Corpus.key) : + S with type key = Corpus.key and type corpus = Corpus.t -(** Default implementation that uses memory chunk as the domain. *) +(** Default implementation that uses memory chunk as the domain. *) module Bytes : sig - include V2.S with type key = mem - and type corpus = mem - and type token := word + include V2.S with type key = mem and type corpus = mem and type token := word val t : t Bap_byteweight_signatures.data - - (** [find mem ~length ~threshold corpus] extract addresses of all - memory chunks of the specified [length], that were classified - positively under given [threshold]. *) val find : t -> length:int -> threshold:float -> corpus -> addr list + (** [find mem ~length ~threshold corpus] extract addresses of all memory + chunks of the specified [length], that were classified positively under + given [threshold]. *) - + val find_if : + t -> length:int -> f:(key -> int -> stats -> bool) -> corpus -> addr list (** [find_if mem ~length ~f corpus] finds all positively classfied chunks. This is a generalization of the [find] function with an arbitrary thresholding function. - It scans the input corpus using the [next_if] function and - collects all positive results. - *) - val find_if : t -> length:int -> f:(key -> int -> stats -> bool) -> corpus -> addr list - + It scans the input corpus using the [next_if] function and collects all + positive results. *) + val find_using_bayes_factor : + t -> min_length:int -> max_length:int -> float -> corpus -> addr list (** [find_using_bayes_factor sigs mem] classify functions starts using the Bayes factor procedure. - Returns a list of addresses in [mem] that have a signature in - [sigs] with length [min_length <= n <= max_length] and the Bayes - factor greater than [threshold]. + Returns a list of addresses in [mem] that have a signature in [sigs] with + length [min_length <= n <= max_length] and the Bayes factor greater than + [threshold]. - The Bayes factor is the ratio between posterior probabilities of - two hypothesis, the [h1] hypothesis that the given sequence of bytes - occurs at the function start, and the dual [h0] hypothesis, + The Bayes factor is the ratio between posterior probabilities of two + hypothesis, the [h1] hypothesis that the given sequence of bytes occurs at + the function start, and the dual [h0] hypothesis, [k = P(h1|s)/P(h0|s) = (P(s|h1)/P(s|h0)) * (P(h1)/P(h0))], where - - [P(hN|s)] is the probability of the hypothesis [P(hN)] - given the sequence of bytes [s] as the evidence, - - [P(s|hN] is the probability of the sequence of bytes [s], - given the hypothesis [hN], + - [P(hN|s)] is the probability of the hypothesis [P(hN)] given the + sequence of bytes [s] as the evidence, + - [P(s|hN] is the probability of the sequence of bytes [s], given the + hypothesis [hN], - [P(hN)] is the prior probability of the hypothesis [hN]. - Given that [m] is the total number of occurences of a sequence - of bytes [s] at the beginning of a function, and [n] is the total - number of occurences of [s] in a middle of a function, we compute - [P(s|h1)] and [P(s|h0)] as + Given that [m] is the total number of occurences of a sequence of bytes + [s] at the beginning of a function, and [n] is the total number of + occurences of [s] in a middle of a function, we compute [P(s|h1)] and + [P(s|h0)] as - [P(s|h1) = m / (m+n)], - [P(s|h0) = 1 - P(s|h1) = n / (m+n)]. Given that [q] is the total number of substrings in [sigs] of length - [min_length <= l <= max_length] and [p] is the total number of - substrings of the length [l] that start functions, we compute prior - probabilities as, + [min_length <= l <= max_length] and [p] is the total number of substrings + of the length [l] that start functions, we compute prior probabilities as, - [P(h1) = p / q], - [P(h0) = 1 - P(h1)]. - - The resulting factor is a value [0 < k < infinity] that - quantify the strength of the evidence that a given substring - gives in support of the hypothesis [h1]. Levels below [1] - support hypothesis [h0], levels above [1] give some support of - [h1], with the following interpretations (Kass and Raftery - (1995)), + The resulting factor is a value [0 < k < infinity] that quantify the + strength of the evidence that a given substring gives in support of the + hypothesis [h1]. Levels below [1] support hypothesis [h0], levels above + [1] give some support of [h1], with the following interpretations (Kass + and Raftery (1995)), {v Bayes Factor Strength @@ -226,67 +193,48 @@ module Bytes : sig 3.2 to 10 Substantial 10 to 100 Strong 100 and greater Decisive - v} - *) - val find_using_bayes_factor : t -> - min_length:int -> - max_length:int -> - float -> - corpus -> addr list + v} *) + val find_using_threshold : + t -> min_length:int -> max_length:int -> float -> corpus -> addr list + (** [find_using_threshold sigs mem] classify function starts using a simple + thresholding procedure. - (** [find_using_threshold sigs mem] classify function starts using - a simple thresholding procedure. - - Returns a list of addresses in [mem] that have a signature [s] in - [sigs] with length [min_length <= n <= max_length] and the - sample probability [P1(s)] of starting a function greater than - [threshold], + Returns a list of addresses in [mem] that have a signature [s] in [sigs] + with length [min_length <= n <= max_length] and the sample probability + [P1(s)] of starting a function greater than [threshold], [P1(s) = m / (m+n)], where - - m - the total number of occurences of [s] at the begining - of a function in [sigs]; - - n - the total number of occurences of [s] not at the begining - of a function in [sigs]. - *) - val find_using_threshold : t -> - min_length:int -> - max_length:int -> - float -> - corpus -> addr list + - m - the total number of occurences of [s] at the begining of a function + in [sigs]; + - n - the total number of occurences of [s] not at the begining of a + function in [sigs]. *) end - module Stats : sig type t = stats - - (** [trial stats] is the total number of trials. - - This is the total number of occurences of the given substring in - all tests, it is equal to [h0 stats + h1 stats]. - *) val trials : t -> int + (** [trial stats] is the total number of trials. + This is the total number of occurences of the given substring in all + tests, it is equal to [h0 stats + h1 stats]. *) + val h0 : t -> int (** [h0 stats] is how many times the null-hypothesis being accepted. - This statistics tells us exactly how many times the label - function returned false for this substring. - - In terms of the function starts, this is how many times the - substring was classified as not a function start. - *) - val h0 : t -> int + This statistics tells us exactly how many times the label function + returned false for this substring. + In terms of the function starts, this is how many times the substring was + classified as not a function start. *) + val h1 : t -> int (** [h1 stats] is how many times the null hypothesis was rejected. - This statistic tells us exactly how many times the label - function returned true for this substring. + This statistic tells us exactly how many times the label function returned + true for this substring. - In terms of the function starts, this is how many times the - substring was classified as a function start. - *) - val h1 : t -> int + In terms of the function starts, this is how many times the substring was + classified as a function start. *) end diff --git a/lib/bap_byteweight/bap_byteweight_signatures.ml b/lib/bap_byteweight/bap_byteweight_signatures.ml index 0ecc78596..8b4d76e7e 100644 --- a/lib/bap_byteweight/bap_byteweight_signatures.ml +++ b/lib/bap_byteweight/bap_byteweight_signatures.ml @@ -1,46 +1,37 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap_core_theory open Bap.Std -module Sys = Caml.Sys - +module Sys = Stdlib.Sys module Config = Bap_main.Extension.Configuration -let (/) = Filename.concat +let ( / ) = Filename.concat -type error = [ - | `Corrupted of string +type error = + [ `Corrupted of string | `No_signatures | `No_entry of string - | `Sys_error of string -] - + | `Sys_error of string ] -type 'a data = { - name : string; - load : (bytes -> 'a); - save : ('a -> bytes); -} +type 'a data = { name : string; load : bytes -> 'a; save : 'a -> bytes } exception Failed of error let fail error = raise (Failed error) let corrupted entry err = `Corrupted (sprintf "%s: %s" entry err) let zip_error entry err = fail (corrupted entry err) - -let compiler_name = - Option.value_map ~default:"default" ~f: Theory.Compiler.name +let compiler_name = Option.value_map ~default:"default" ~f:Theory.Compiler.name let matches_modulo_bits t name = match Theory.Target.matching t name with | None -> false | Some t' -> Theory.Target.bits t = Theory.Target.bits t' -let matching_entry ?compiler target data {Zip.filename} = +let matching_entry ?compiler target data { Zip.filename } = match String.split filename ~on:'/' with - | [p1; p2; p3] -> - matches_modulo_bits target p1 && - String.equal (compiler_name compiler) p2 && - String.equal data.name p3 + | [ p1; p2; p3 ] -> + matches_modulo_bits target p1 + && String.equal (compiler_name compiler) p2 + && String.equal data.name p3 | _ -> fail (`Corrupted ("invalid entry name: " ^ filename)) let with_input file k = @@ -53,18 +44,17 @@ let with_output file k = let read_entry ?compiler target data file = with_input file @@ fun zip -> - Zip.entries zip |> - List.find ~f:(matching_entry ?compiler target data) |> function + Zip.entries zip |> List.find ~f:(matching_entry ?compiler target data) + |> function | None -> None - | Some entry -> - Some (data.load (Bytes.of_string (Zip.read_entry zip entry))) + | Some entry -> Some (data.load (Bytes.of_string (Zip.read_entry zip entry))) let read_entries file = if Fn.non Sys.file_exists file then [] - else with_input file @@ fun zip -> - Zip.entries zip |> - List.map ~f:(fun entry -> - entry,Zip.read_entry zip entry) + else + with_input file @@ fun zip -> + Zip.entries zip + |> List.map ~f:(fun entry -> (entry, Zip.read_entry zip entry)) let target_name = Fn.compose KB.Name.unqualified Theory.Target.name @@ -72,24 +62,24 @@ let make_entry ?compiler target data = target_name target / compiler_name compiler / data.name let make_path root = root / "signatures" / "byteweight.zip" - let system_path = make_path Config.sysdatadir -let default_path = match Sys.getenv_opt "BAP_SIGFILE" with +let default_path = + match Sys.getenv_opt "BAP_SIGFILE" with | Some path -> path | None -> make_path Config.datadir -let default_paths = [default_path; system_path] +let default_paths = [ default_path; system_path ] -let try_lookup ?(paths=[]) ?compiler target data = - paths @ default_paths |> List.find_map ~f:(fun path -> - if Sys.file_exists path - then read_entry ?compiler target data path - else None) +let try_lookup ?(paths = []) ?compiler target data = + paths @ default_paths + |> List.find_map ~f:(fun path -> + if Sys.file_exists path then read_entry ?compiler target data path + else None) let of_exn = function | Sys_error msg -> Error (`Sys_error msg) - | Zip.Error (_,ent,err) -> Error (corrupted ent err) + | Zip.Error (_, ent, err) -> Error (corrupted ent err) | Failed er -> Error er | other -> raise other @@ -99,17 +89,20 @@ let lookup ?paths ?compiler target data = | None -> Error (`No_entry (target_name target)) | Some data -> Ok data - let update_or_fail ?compiler target data payload path = let entries = - read_entries path |> - List.filter ~f:(fun (entry,_) -> - not (matching_entry ?compiler target data entry)) in + read_entries path + |> List.filter ~f:(fun (entry, _) -> + not (matching_entry ?compiler target data entry)) + in with_output path @@ fun zip -> let path = make_entry ?compiler target data in - let data = Bytes.unsafe_to_string (data.save payload) in + let data = + Bytes.unsafe_to_string + ~no_mutation_while_string_reachable:(data.save payload) + in Zip.add_entry data zip path; - List.iter entries ~f:(fun ({Zip.filename; comment; mtime; _},data) -> + List.iter entries ~f:(fun ({ Zip.filename; comment; mtime; _ }, data) -> Zip.add_entry data zip filename ~comment ~mtime) let copy input output = @@ -118,11 +111,12 @@ let copy input output = let rec loop () = let read = In_channel.input input ~buf ~pos:0 ~len in Out_channel.output output ~buf ~pos:0 ~len:read; - if read = len then loop () in + if read = len then loop () + in loop () let temporary_copy file = - let tmp,output = Caml.Filename.open_temp_file "byteweight" "copy" in + let tmp, output = Stdlib.Filename.open_temp_file "byteweight" "copy" in In_channel.with_file file ~f:(fun input -> copy input output); Out_channel.close output; tmp @@ -141,12 +135,13 @@ module Data = struct let registry = Hash_set.create (module String) let declare ~load ~save name = - if Hash_set.mem registry name - then failwithf "The byteweight data type named %S is \ - already registered, please pick another name" + if Hash_set.mem registry name then + failwithf + "The byteweight data type named %S is already registered, please pick \ + another name" name (); Hash_set.add registry name; - {load; save; name} + { load; save; name } end (* the old deprecated implementation *) @@ -158,61 +153,67 @@ let resolve_path user = | None -> fail `No_signatures | Some path -> path -let entry ?(comp="default") ~mode arch = - Arch.to_string arch / comp / mode +let entry ?(comp = "default") ~mode arch = Arch.to_string arch / comp / mode let load_exn ?comp ?path ~mode arch = let path = resolve_path path in - let zip = try Zip.open_in path with + let zip = + try Zip.open_in path with | Sys_error msg -> fail (`Sys_error msg) - | Zip.Error (_,ent,err) -> zip_error ent err in + | Zip.Error (_, ent, err) -> zip_error ent err + in let entry_path = entry ?comp ~mode arch in - let r = try + let r = + try let entry = Zip.find_entry zip entry_path in - Ok (Zip.read_entry zip entry |> Caml.Bytes.unsafe_of_string) - with Caml.Not_found -> fail (`No_entry entry_path) - | Zip.Error (_,ent,err) -> zip_error ent err in + Ok (Zip.read_entry zip entry |> Stdlib.Bytes.unsafe_of_string) + with + | Stdlib.Not_found -> fail (`No_entry entry_path) + | Zip.Error (_, ent, err) -> zip_error ent err + in Zip.close_in zip; r let load ?comp ?path ~mode arch = - try load_exn ?comp ?path ~mode arch with - | Failed err -> Error err + try load_exn ?comp ?path ~mode arch with Failed err -> Error err (* for some reason Zip truncates the output file, and doesn't provide us an option to append anything to for it. *) let save_exn ?comp ~mode ~path arch data = let data = Bytes.to_string data in - let old = try - if Sys.file_exists path then + let old = + try + if Sys.file_exists path then ( let zip = Zip.open_in path in let ins = - Zip.entries zip |> List.map ~f:(fun e -> - e, Zip.read_entry zip e) in - Zip.close_in zip; ins + Zip.entries zip |> List.map ~f:(fun e -> (e, Zip.read_entry zip e)) + in + Zip.close_in zip; + ins) else [] - with Sys_error msg -> fail (`Sys_error msg) - | Zip.Error (_,ent,err) -> zip_error ent err in - let zip = try Zip.open_out path with - Sys_error msg -> fail (`Sys_error msg) in + with + | Sys_error msg -> fail (`Sys_error msg) + | Zip.Error (_, ent, err) -> zip_error ent err + in + let zip = + try Zip.open_out path with Sys_error msg -> fail (`Sys_error msg) + in try let dst = entry ?comp ~mode arch in - List.iter old ~f:(fun (entry,data) -> + List.iter old ~f:(fun (entry, data) -> let file = Zip.(entry.filename) in if String.(file <> dst) then Zip.add_entry data zip file); Zip.add_entry data zip dst; Zip.close_out zip - with Sys_error msg -> fail (`Sys_error msg) - | Zip.Error (_,ent,err) -> zip_error ent err - + with + | Sys_error msg -> fail (`Sys_error msg) + | Zip.Error (_, ent, err) -> zip_error ent err let save ?comp ~mode ~path arch data = - try Ok (save_exn ?comp ~mode ~path arch data) - with Failed err -> Error err - + try Ok (save_exn ?comp ~mode ~path arch data) with Failed err -> Error err let string_of_error = function | `Corrupted msg -> sprintf "signature database is corrupted: %s" msg | `No_signatures -> "signature database doesn't exist" | `Sys_error msg -> sprintf "system error: %s" msg - | `No_entry msg -> sprintf "can't access given entry: %s" msg + | `No_entry msg -> sprintf "can't access given entry: %s" msg diff --git a/lib/bap_byteweight/bap_byteweight_signatures.mli b/lib/bap_byteweight/bap_byteweight_signatures.mli index 397e09be8..3faf7ae42 100644 --- a/lib/bap_byteweight/bap_byteweight_signatures.mli +++ b/lib/bap_byteweight/bap_byteweight_signatures.mli @@ -1,124 +1,114 @@ (** Interface to the unified storage of signatures. - The signatures a key-value pairs (entries) located in one or more - archives. Keys are target/compiler descriptions and values are - arbitrary data. + The signatures a key-value pairs (entries) located in one or more archives. + Keys are target/compiler descriptions and values are arbitrary data. - The data types of the signature are described with the [Data] - module. This library doesn't specify any data types of signature - values and they are commonly provided by the libraries that define - those data types, e.g., [Bap_byteweight.Bytes]. -*) + The data types of the signature are described with the [Data] module. This + library doesn't specify any data types of signature values and they are + commonly provided by the libraries that define those data types, e.g., + [Bap_byteweight.Bytes]. *) -open Core_kernel[@@warning "-D"] +open Core open Bap_core_theory open Bap.Std -(** Error conditions *) -type error = [ - | `Corrupted of string (** Signature file is corrupted *) - | `No_signatures (** Signature file is not found *) - | `No_entry of string (** Corresponding entry not found *) - | `Sys_error of string (** System error has occurred *) -] +type error = + [ `Corrupted of string (** Signature file is corrupted *) + | `No_signatures (** Signature file is not found *) + | `No_entry of string (** Corresponding entry not found *) + | `Sys_error of string (** System error has occurred *) ] +(** Error conditions *) -(** the descriptor of the data type stored in the signature entry. - - @since 2.5.0 -*) type 'a data +(** the descriptor of the data type stored in the signature entry. + @since 2.5.0 *) -(** [lookup t f] looks up for the matching entry in the signature database. - - The search is performed over the [paths] list that is a list of - filenames. The first matching entry is selected. If a file in the - [paths] list doesn't exist then it is skipped. If it exists but - unreadable an error is returned. - - The paths list is always appended by [[default_path; system_path]], - in that specific order. - - If [compiler] is specified, then only entries that list matching - compiler will be selected. - - The target matches are performed with the [Theory.Target.matches] - function. - - @since 2.5.0 -*) val lookup : ?paths:string list -> ?compiler:Theory.compiler -> - Theory.Target.t -> 'a data -> ('a, error) Result.t + Theory.Target.t -> + 'a data -> + ('a, error) Result.t +(** [lookup t f] looks up for the matching entry in the signature database. + The search is performed over the [paths] list that is a list of filenames. + The first matching entry is selected. If a file in the [paths] list doesn't + exist then it is skipped. If it exists but unreadable an error is returned. -(** [update t f x path] updates or creates an entry in the signature database. + The paths list is always appended by [[default_path; system_path]], in that + specific order. + + If [compiler] is specified, then only entries that list matching compiler + will be selected. + + The target matches are performed with the [Theory.Target.matches] function. - Removes all entries that match with the specified compiler, - target, and data type and adds a new entry with the provided - data. All unmatching entries are preserved. + @since 2.5.0 *) - @since 2.5.0 -*) val update : ?compiler:Theory.compiler -> - Theory.Target.t -> 'a data -> 'a -> string -> (unit,error) Result.t + Theory.Target.t -> + 'a data -> + 'a -> + string -> + (unit, error) Result.t +(** [update t f x path] updates or creates an entry in the signature database. + + Removes all entries that match with the specified compiler, target, and data + type and adds a new entry with the provided data. All unmatching entries are + preserved. + @since 2.5.0 *) (** Interface for declaring signature database data types. *) module Data : sig - + val declare : load:(bytes -> 'a) -> save:('a -> bytes) -> string -> 'a data (** [declare ~load ~save name] declares a new mode. - The [load] and [save] functions are used to store the mode - information in the signatures database. + The [load] and [save] functions are used to store the mode information in + the signatures database. - Raises an exception if the mode name is not unique. - *) - val declare : - load:(bytes -> 'a) -> - save:('a -> bytes) -> - string -> 'a data + Raises an exception if the mode name is not unique. *) end -(** [save ?comp ~mode ~path arch data] store signatures data in the - database of signatures specified by the [path] parameter. - - - The triple [arch-comp-mode] defines a key for the created entry. If an - entry with the same name existed, then it would be overwritten - with the new data. If the database, doesn't exist, then it will be - created and the specified destination.*) -val save : ?comp:string -> mode:string -> path:string -> arch -> bytes -> - (unit,error) Result.t +val save : + ?comp:string -> + mode:string -> + path:string -> + arch -> + bytes -> + (unit, error) Result.t [@@deprecated "since 2022-02 use [lookup]"] +(** [save ?comp ~mode ~path arch data] store signatures data in the database of + signatures specified by the [path] parameter. + + The triple [arch-comp-mode] defines a key for the created entry. If an entry + with the same name existed, then it would be overwritten with the new data. + If the database, doesn't exist, then it will be created and the specified + destination.*) +val load : + ?comp:string -> ?path:string -> mode:string -> arch -> (bytes, error) Result.t +[@@deprecated "since 2022-02 use [update]"] (** [load ?comp ?path ~mode arch] finds a signature for the specified [arch-comp-path] triple. If [path] is not set, the the signatures are looked up first in [default_path] and, if not found, in [system_path]. - Since 2.3.0 the path search has changed to look into two - locations. -*) -val load : ?comp:string -> ?path:string -> mode:string -> arch -> - (bytes,error) Result.t -[@@deprecated "since 2022-02 use [update]"] - + Since 2.3.0 the path search has changed to look into two locations. *) +val default_path : string (** default path for the user's signatures database. - Since 2.3.0 it is pointed to the user-specific location, not - to the system-wide. See also {!system_path}. -*) -val default_path : string + Since 2.3.0 it is pointed to the user-specific location, not to the + system-wide. See also {!system_path}. *) +val system_path : string (** the path to the system-wide location of signatures. @since 2.3.0*) -val system_path : string -(** a human readable representation of an error. *) val string_of_error : error -> string +(** a human readable representation of an error. *) diff --git a/lib/bap_byteweight/dune b/lib/bap_byteweight/dune index 65d1da90e..5dd18acd8 100644 --- a/lib/bap_byteweight/dune +++ b/lib/bap_byteweight/dune @@ -1,7 +1,7 @@ (library (name bap_byteweight) (public_name bap-byteweight) - (preprocess (pps ppx_bap)) + (preprocess + (pps ppx_bap)) (wrapped false) - (libraries bap bap-main bap-core-theory bap-knowledge - core_kernel uri camlzip)) + (libraries bap bap-main bap-core-theory bap-knowledge core uri camlzip)) diff --git a/lib/bap_c/bap_c.ml b/lib/bap_c/bap_c.ml index 4a8e9e65f..c4ac748b9 100644 --- a/lib/bap_c/bap_c.ml +++ b/lib/bap_c/bap_c.ml @@ -1,28 +1,30 @@ (** C language support library. - This library brings support for C Abstract Machine. In particular - it adds a fairly complete support for C type system, data model, - and ABI. + This library brings support for C Abstract Machine. In particular it adds a + fairly complete support for C type system, data model, and ABI. - [open Bap_c.Std] module to use this library, it only defines one - module [C] that includes the rest of the library. It also defines - an interface for parser, that maybe provided by a third party. -*) + [open Bap_c.Std] module to use this library, it only defines one module [C] + that includes the rest of the library. It also defines an interface for + parser, that maybe provided by a third party. *) module Std = struct module C = struct - module Abi = Bap_c_abi + module Abi = Bap_c_abi module Attr = Bap_c_attr module Data = Bap_c_data module Size = Bap_c_size + module Type = struct include Bap_c_type + module Mapper = struct include Bap_c_type_mapper_intf include Bap_c_type_mapper end + include Bap_c_type_printer end + module Parser = Bap_c_parser include Bap_c_term_attributes end diff --git a/lib/bap_c/bap_c_abi.ml b/lib/bap_c/bap_c_abi.ml index c9740b134..e23fdf059 100644 --- a/lib/bap_c/bap_c_abi.ml +++ b/lib/bap_c/bap_c_abi.ml @@ -1,183 +1,192 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap_core_theory open Bap.Std open Bap_c_type open Monads.Std - -include Self() - +include Self () module Attrs = Bap_c_term_attributes module Data = Bap_c_data + type ctype = t let is_const p = p.Spec.qualifier.Qualifier.const let is_mutable p = not (is_const p) - -let rec lvalue (t : ctype) = match t with +let rec lvalue (t : ctype) = + match t with | `Void -> true | `Basic t -> is_mutable t - | `Pointer ({Spec.t} as p) -> is_mutable p || lvalue t - | `Array ({Spec.t={Array.element=t}} as p) -> is_mutable p || lvalue t + | `Pointer ({ Spec.t } as p) -> is_mutable p || lvalue t + | `Array ({ Spec.t = { Array.element = t } } as p) -> is_mutable p || lvalue t | `Function _ -> false - | `Structure {Spec.t={Compound.fields}} - | `Union {Spec.t={Compound.fields}} -> - List.exists fields ~f:(fun (_,t) -> lvalue t) + | `Structure { Spec.t = { Compound.fields } } + | `Union { Spec.t = { Compound.fields } } -> + List.exists fields ~f:(fun (_, t) -> lvalue t) let arg_intent : ctype -> intent = function | `Void -> In | `Basic _ -> In - | `Pointer {Spec.t} when lvalue t -> Both - | `Array {Spec.t={Array.element=e}} when lvalue e -> Both + | `Pointer { Spec.t } when lvalue t -> Both + | `Array { Spec.t = { Array.element = e } } when lvalue e -> Both | `Pointer _ | `Array _ -> In | `Function _ -> In - | `Union _ - | `Structure _ -> In - + | `Union _ | `Structure _ -> In -type error = [ - | `Unknown_interface of string - | `Parser_error of string * Error.t -] [@@deriving sexp_of] +type error = [ `Unknown_interface of string | `Parser_error of string * Error.t ] +[@@deriving sexp_of] let sexp_of_exp exp = Sexp.Atom (Exp.to_string exp) + type param = Data.t * exp [@@deriving sexp] type args = { return : param option; hidden : (Bap_c_type.t * param) list; params : param list; -} [@@deriving sexp] +} +[@@deriving sexp] type t = { insert_args : sub term -> attr list -> proto -> args option; apply_attrs : attr list -> sub term -> sub term; } - exception Failed of error [@@deriving sexp_of] + let fail x = raise (Failed x) let data (size : #Bap_c_size.base) (t : Bap_c_type.t) = let open Data in - let sizeof t = match size#bits t with - | None -> Size.in_bits size#pointer - | Some s -> s in + let sizeof t = + match size#bits t with None -> Size.in_bits size#pointer | Some s -> s + in let padding pad : Data.t = match Size.of_int_opt pad with - | Some pad -> Imm (pad,Set []) + | Some pad -> Imm (pad, Set []) | None -> - let data : Data.t = Imm (`r8,Set []) in - Seq (List.init (pad/8) ~f:(Fn.const data)) in + let data : Data.t = Imm (`r8, Set []) in + Seq (List.init (pad / 8) ~f:(Fn.const data)) + in let rec data = function | `Void -> Seq [] - | `Basic {Spec.t} -> Imm (size#basic t, Top) - | `Pointer {Spec.t} -> Ptr (data t) - | `Array {Spec.t={Array.element=t; size=None}} -> Ptr (data t) - | `Array {Spec.t={Array.element=t; size=Some n}} -> - Ptr (Seq (List.init n ~f:(Fn.const (data t)))) - | `Structure {Spec.t={Compound.fields=fs}} -> - List.fold fs ~init:(0,0,[]) ~f:(fun (off,total,seq) (_,t) -> - let fsize = sizeof t in - let pad = Bap_c_size.padding (size#alignment t) off in - off + fsize + pad, total + fsize + pad, match pad with - | 0 -> data t :: seq - | _ -> data t :: padding pad :: seq) |> fun (_,total,ss) -> - let fullsize = sizeof t in - let pad = max 0 (fullsize - total) in - let ss = if pad = 0 then ss else padding (fullsize-total) :: ss in - Seq (List.rev ss) + | `Basic { Spec.t } -> Imm (size#basic t, Top) + | `Pointer { Spec.t } -> Ptr (data t) + | `Array { Spec.t = { Array.element = t; size = None } } -> Ptr (data t) + | `Array { Spec.t = { Array.element = t; size = Some n } } -> + Ptr (Seq (List.init n ~f:(Fn.const (data t)))) + | `Structure { Spec.t = { Compound.fields = fs } } -> + List.fold fs ~init:(0, 0, []) ~f:(fun (off, total, seq) (_, t) -> + let fsize = sizeof t in + let pad = Bap_c_size.padding (size#alignment t) off in + ( off + fsize + pad, + total + fsize + pad, + match pad with + | 0 -> data t :: seq + | _ -> data t :: padding pad :: seq )) + |> fun (_, total, ss) -> + let fullsize = sizeof t in + let pad = max 0 (fullsize - total) in + let ss = if pad = 0 then ss else padding (fullsize - total) :: ss in + Seq (List.rev ss) | `Union _ -> - let sz = sizeof t in - Seq (List.init (sz/8) ~f:(fun _ -> Imm (`r8,Top))) - | `Function _ -> Ptr (Imm ((size#pointer :> size),Top)) in + let sz = sizeof t in + Seq (List.init (sz / 8) ~f:(fun _ -> Imm (`r8, Top))) + | `Function _ -> Ptr (Imm ((size#pointer :> size), Top)) + in data t let layout (size : #Bap_c_size.base) (t : Bap_c_type.t) = let open Data in - let sizeof t = match size#bits t with - | None -> Size.in_bits size#pointer - | Some s -> s in - let imm size obj : Data.layout = {layout=Imm(size,obj)} - and ptr {layout=data} : Data.layout = {layout=Ptr data} - and seq layouts : Data.layout = { - layout = Seq (List.map layouts ~f:(fun {layout} -> layout)) - } in + let sizeof t = + match size#bits t with None -> Size.in_bits size#pointer | Some s -> s + in + let imm size obj : Data.layout = { layout = Imm (size, obj) } + and ptr { layout = data } : Data.layout = { layout = Ptr data } + and seq layouts : Data.layout = + { layout = Seq (List.map layouts ~f:(fun { layout } -> layout)) } + in let padding pad : Data.layout = imm pad Undef in - let rec layout t : Data.layout = match t with + let rec layout t : Data.layout = + match t with | `Void -> imm 8 Undef - | `Basic {Spec.t} -> imm (Size.in_bits (size#basic t)) (Basic t) - | `Pointer {Spec.t} -> ptr (layout t) - | `Array {Spec.t={Array.element=t; size=None}} -> ptr (layout t) - | `Array {Spec.t={Array.element=t; size=Some n}} -> - ptr (seq (List.init n ~f:(Fn.const (layout t)))) - | `Structure {Spec.t={Compound.fields=fs}} -> - List.fold fs ~init:(0,0,[]) ~f:(fun (off,total,seq) (name,t) -> - let fsize = sizeof t in - let pad = Bap_c_size.padding (size#alignment t) off in - off + fsize + pad, total + fsize + pad, - imm fsize (Field (name,layout t)) :: - match pad with - | 0 -> seq - | _ -> padding pad :: seq) |> fun (_,total,ss) -> - let fullsize = sizeof t in - let pad = max 0 (fullsize - total) in - let ss = if pad = 0 then ss else padding (fullsize-total) :: ss in - seq (List.rev ss) - | `Union {Spec.t={Compound.fields=fs}} -> - let total = sizeof t in - let variants = List.map fs ~f:(fun (name,t) -> - let fsize = sizeof t in - let pad = max 0 (total - fsize) in - let field = imm fsize @@ Field (name, layout t) in - match pad with - | 0 -> field - | _ -> seq [field; padding pad]) in - imm total (Union variants) - | `Function _ -> ptr (imm (Size.in_bits (size#pointer)) Undef) in + | `Basic { Spec.t } -> imm (Size.in_bits (size#basic t)) (Basic t) + | `Pointer { Spec.t } -> ptr (layout t) + | `Array { Spec.t = { Array.element = t; size = None } } -> ptr (layout t) + | `Array { Spec.t = { Array.element = t; size = Some n } } -> + ptr (seq (List.init n ~f:(Fn.const (layout t)))) + | `Structure { Spec.t = { Compound.fields = fs } } -> + List.fold fs ~init:(0, 0, []) ~f:(fun (off, total, seq) (name, t) -> + let fsize = sizeof t in + let pad = Bap_c_size.padding (size#alignment t) off in + ( off + fsize + pad, + total + fsize + pad, + imm fsize (Field (name, layout t)) + :: (match pad with 0 -> seq | _ -> padding pad :: seq) )) + |> fun (_, total, ss) -> + let fullsize = sizeof t in + let pad = max 0 (fullsize - total) in + let ss = if pad = 0 then ss else padding (fullsize - total) :: ss in + seq (List.rev ss) + | `Union { Spec.t = { Compound.fields = fs } } -> + let total = sizeof t in + let variants = + List.map fs ~f:(fun (name, t) -> + let fsize = sizeof t in + let pad = max 0 (total - fsize) in + let field = imm fsize @@ Field (name, layout t) in + match pad with 0 -> field | _ -> seq [ field; padding pad ]) + in + imm total (Union variants) + | `Function _ -> ptr (imm (Size.in_bits size#pointer) Undef) + in layout t let rec size_of_data size : Data.t -> int = function - | Imm (size,_) -> Size.in_bits size + | Imm (size, _) -> Size.in_bits size | Seq xs -> List.sum (module Int) ~f:(size_of_data size) xs - | Ptr _ -> Size.in_bits (size#pointer) + | Ptr _ -> Size.in_bits size#pointer let rec size_of_layout size : Data.layout -> int = - fun {layout} -> size_of_datum size layout + fun { layout } -> size_of_datum size layout + and size_of_datum size : _ Data.datum -> int = function - | Imm (size,_) -> size + | Imm (size, _) -> size | Seq xs -> List.sum (module Int) ~f:(size_of_datum size) xs - | Ptr _ -> Size.in_bits (size#pointer) + | Ptr _ -> Size.in_bits size#pointer let array_to_pointer (t : ctype) : ctype = match t with - | `Array ({t={element}} as s) -> `Pointer {s with t = element} + | `Array ({ t = { element } } as s) -> `Pointer { s with t = element } | t -> t -let decay_arrays : proto -> proto = fun proto -> { +let decay_arrays : proto -> proto = + fun proto -> + { proto with return = array_to_pointer proto.return; args = List.Assoc.map ~f:array_to_pointer proto.args; } -let coerce ltyp rtyp exp = match ltyp,rtyp with - | Type.Mem _,_| _,Type.Mem _ - | Type.Unk,_ | _, Type.Unk -> exp - | Imm m, Imm n -> match Int.compare m n with - | 0 -> exp - | 1 -> Bil.(cast signed m exp) - | _ -> Bil.(cast low m exp) - - -let create_arg size i intent name t (data,exp) sub = - let layout = match data with +let coerce ltyp rtyp exp = + match (ltyp, rtyp) with + | Type.Mem _, _ | _, Type.Mem _ | Type.Unk, _ | _, Type.Unk -> exp + | Imm m, Imm n -> ( + match Int.compare m n with + | 0 -> exp + | 1 -> Bil.(cast signed m exp) + | _ -> Bil.(cast low m exp)) + +let create_arg size i intent name t (data, exp) sub = + let layout = + match data with | Data.Ptr _ -> - if Bap_c_type.is_pointer t then layout size t - else layout size (Bap_c_type.pointer t) - | _ -> layout size t in + if Bap_c_type.is_pointer t then layout size t + else layout size (Bap_c_type.pointer t) + | _ -> layout size t + in let ltyp = Type.imm (size_of_layout size layout) in let rtyp = Type.infer_exn exp in - let name = if String.is_empty name then sprintf "arg%d" (i+1) else name in + let name = if String.is_empty name then sprintf "arg%d" (i + 1) else name in let var = Var.create (Sub.name sub ^ "_" ^ name) ltyp in let arg = Arg.create ~intent var @@ coerce ltyp rtyp exp in let arg = Term.set_attr arg Attrs.data data in @@ -185,35 +194,42 @@ let create_arg size i intent name t (data,exp) sub = let arg = Term.set_attr arg Attrs.layout layout in arg - - let models = Hashtbl.create (module Theory.Target) let register_model target model = - if Hashtbl.mem models target - then invalid_argf "A data model for target %s is already set" - (Theory.Target.to_string target) (); - Hashtbl.add_exn models target (model :> Bap_c_size.base) - -let model target = match Hashtbl.find models target with + if Hashtbl.mem models target then + invalid_argf "A data model for target %s is already set" + (Theory.Target.to_string target) + (); + Hashtbl.add_exn models ~key:target ~data:(model :> Bap_c_size.base) + +let model target = + match Hashtbl.find models target with | Some m -> m - | None -> if Theory.Target.bits target = 32 - then new Bap_c_size.base `LP32 - else new Bap_c_size.base `LP64 + | None -> + if Theory.Target.bits target = 32 then new Bap_c_size.base `LP32 + else new Bap_c_size.base `LP64 let registry = Hashtbl.create (module Theory.Target) let register name abi = - let target = match Theory.Target.lookup ~package:"bap" name with + let target = + match Theory.Target.lookup ~package:"bap" name with | Some t -> t - | None -> invalid_argf - "The name of the abi should be a valid name. Got %s. \ - See `bap list targets` for the list valid names" name () in + | None -> + invalid_argf + "The name of the abi should be a valid name. Got %s. See `bap list \ + targets` for the list valid names" + name () + in Hashtbl.add registry ~key:target ~data:abi |> function | `Ok -> () | `Duplicate -> - invalid_argf "The processor for ABI %s is already registered. \ - Please pick a unique name" name () + invalid_argf + "The processor for ABI %s is already registered. Please pick a unique \ + name" + name () + let register_abi = register let get_processor name = @@ -223,56 +239,61 @@ let get_processor name = let lookup = Hashtbl.find registry - -let get_prototype gamma name = match gamma name with +let get_prototype gamma name = + match gamma name with | Some (`Function proto) -> proto | _ -> - let open Bap_c_type in - Spec.{ - qualifier = `no_qualifier; - attrs = []; - t = Proto.{ - args = []; - variadic = false; - return = `Basic { - qualifier = Qualifier.{ - const = false; - volatile = false; - restrict = (); - }; - attrs = []; - t = `sint; - } + let open Bap_c_type in + Spec. + { + qualifier = `no_qualifier; + attrs = []; + t = + Proto. + { + args = []; + variadic = false; + return = + `Basic + { + qualifier = + Qualifier. + { const = false; volatile = false; restrict = () }; + attrs = []; + t = `sint; + }; + }; } - } - let apply_args abi size attrs t sub = let t = decay_arrays t in match abi.insert_args sub attrs t with | None -> sub - | Some {return; hidden; params} -> - let params = List.mapi params ~f:(fun i a -> i,a) in - List.map2 params t.Bap_c_type.Proto.args ~f:(fun (i,a) (n,t) -> - create_arg size i (arg_intent t) n t a sub) |> - function - | Unequal_lengths -> - error "The ABI processor generated an incorrect number of \ - argument terms for the subroutine %s: %d <> %d" - (Sub.name sub) - (List.length params) - (List.length t.args); - sub - | Ok args -> - let ret = match return with - | None -> [] - | Some ret -> - let t = t.Bap_c_type.Proto.return in - [create_arg size 0 Out "result" t ret sub] in - let hid = List.mapi hidden ~f:(fun i (t,a) -> - let n = "hidden" ^ if i = 0 then "" else Int.to_string i in - create_arg size 0 Both n t a sub) in - List.fold (args@hid@ret) ~init:sub ~f:(Term.append arg_t) + | Some { return; hidden; params } -> ( + let params = List.mapi params ~f:(fun i a -> (i, a)) in + List.map2 params t.Bap_c_type.Proto.args ~f:(fun (i, a) (n, t) -> + create_arg size i (arg_intent t) n t a sub) + |> function + | Unequal_lengths -> + error + "The ABI processor generated an incorrect number of argument terms \ + for the subroutine %s: %d <> %d" + (Sub.name sub) (List.length params) (List.length t.args); + sub + | Ok args -> + let ret = + match return with + | None -> [] + | Some ret -> + let t = t.Bap_c_type.Proto.return in + [ create_arg size 0 Out "result" t ret sub ] + in + let hid = + List.mapi hidden ~f:(fun i (t, a) -> + let n = "hidden" ^ if i = 0 then "" else Int.to_string i in + create_arg size 0 Both n t a sub) + in + List.fold (args @ hid @ ret) ~init:sub ~f:(Term.append arg_t)) let apply abi size attrs t sub = let sub = apply_args abi size attrs t sub in @@ -281,44 +302,46 @@ let apply abi size attrs t sub = abi.apply_attrs attrs sub let create_api_processor size abi : Bap_api.t = - let stage1 gamma = object(self) - inherit Term.mapper - method! map_sub sub = - if Term.has_attr sub Attrs.proto then sub - else self#apply_proto sub - - method private apply_proto sub = - if Term.has_attr sub Sub.intrinsic - then sub - else - let name = Sub.name sub in - let {Bap_c_type.Spec.t; attrs} = get_prototype gamma name in - apply abi size attrs t sub - end in + let stage1 gamma = + object (self) + inherit Term.mapper + + method! map_sub sub = + if Term.has_attr sub Attrs.proto then sub else self#apply_proto sub + + method private apply_proto sub = + if Term.has_attr sub Sub.intrinsic then sub + else + let name = Sub.name sub in + let { Bap_c_type.Spec.t; attrs } = get_prototype gamma name in + apply abi size attrs t sub + end + in let module Api = struct let language = "c" + type t = Term.mapper + let parse_exn get_api intfs : t = let gamma = String.Table.create () in List.iter intfs ~f:(fun api -> match get_api api with | None -> fail (`Unknown_interface api) - | Some file -> - match Bap_c_parser.run (size :> Bap_c_size.base) file with - | Error e -> fail (`Parser_error (api,e)) - | Ok api -> - List.iter api ~f:(fun (key,t) -> - Hashtbl.set gamma ~key ~data:t)); + | Some file -> ( + match Bap_c_parser.run (size :> Bap_c_size.base) file with + | Error e -> fail (`Parser_error (api, e)) + | Ok api -> + List.iter api ~f:(fun (key, t) -> + Hashtbl.set gamma ~key ~data:t))); stage1 (Hashtbl.find gamma) let parse get ifs = Or_error.try_with (fun () -> parse_exn get ifs) - let mapper = Fn.id end in (module Api) module Stack = struct - let create ?(growsup=false) arch off = + let create ?(growsup = false) arch off = let module Target = (val target_of_arch arch) in let sz = (Arch.addr_size arch :> Size.t) in let width = Size.in_bits sz in @@ -326,17 +349,16 @@ module Stack = struct let mem = Bil.var Target.CPU.mem in let sp = Target.CPU.sp in let off = Word.of_int ~width (off * Size.in_bytes sz) in - let addr = if Word.is_zero off - then Bil.(var sp) - else if growsup - then Bil.(var sp - int off) - else Bil.(var sp + int off) in + let addr = + if Word.is_zero off then Bil.(var sp) + else if growsup then Bil.(var sp - int off) + else Bil.(var sp + int off) + in Bil.load ~mem ~addr endian sz end - module Arg = struct - open Core_kernel[@@warning "-D"] + open Core open Bap_core_theory open Bap.Std open Monads.Std @@ -351,145 +373,141 @@ module Arg = struct type t val create : #C.Size.base -> Theory.Target.t -> t option - val base : t -> var - - (** [slots] returns a list of [(offset,datum,size_in_bits)] - slots, where offset is properly aligned. - *) val slots : t -> (int * C.Data.t * int) list + (** [slots] returns a list of [(offset,datum,size_in_bits)] slots, where + offset is properly aligned. *) - (** [add datum bits] adds [bits] representing [datum] to the - next available stack slot. The stack is growing downwards. *) val add : C.Type.t -> C.Data.t -> int -> t -> t + (** [add datum bits] adds [bits] representing [datum] to the next available + stack slot. The stack is growing downwards. *) - (** [push datum bits] prepends [bits] representing [datume] to - the beginning of the descending stack. *) val push : C.Type.t -> C.Data.t -> int -> t -> t + (** [push datum bits] prepends [bits] representing [datume] to the beginning + of the descending stack. *) - (** [skip bits] skips the specified number of bits. *) val skip : int -> t -> t + (** [skip bits] skips the specified number of bits. *) - (** [is_empty] is true if no stack slots were allocated. *) val is_empty : t -> bool + (** [is_empty] is true if no stack slots were allocated. *) end = struct - type info = { - datum : C.Data.t; - ctype : C.Type.t; - } + type info = { datum : C.Data.t; ctype : C.Type.t } + type t = { base : Var.t; - data : (info option * int) Map.M(Int).t; - align : (info option -> int -> int); + data : (info option * int) Map.M(Int).t; + align : info option -> int -> int; } - let bytes bits = (bits - 1) / 8 + 1 + let bytes bits = ((bits - 1) / 8) + 1 let create (ruler : #C.Size.base) target = - let min_alignment = max + let min_alignment = + max (Theory.Target.data_alignment target / 8) - (Theory.Target.data_addr_size target / 8) in + (Theory.Target.data_addr_size target / 8) + in let align = function - | None -> - C.Size.next_multitude_of ~n:min_alignment - | Some {ctype} -> - let m = Size.in_bytes (ruler#alignment ctype) in - C.Size.next_multitude_of ~n:(max min_alignment m) in + | None -> C.Size.next_multitude_of ~n:min_alignment + | Some { ctype } -> + let m = Size.in_bytes (ruler#alignment ctype) in + C.Size.next_multitude_of ~n:(max min_alignment m) + in match Theory.Target.reg target Theory.Role.Register.stack_pointer with | None -> None - | Some sp -> Some { - base = Var.reify sp; - data = Map.empty (module Int); - align; - } + | Some sp -> + Some { base = Var.reify sp; data = Map.empty (module Int); align } let base stack = stack.base let slots stack = - Map.to_alist stack.data |> - List.filter_map ~f:(fun (off,(info,bits)) -> - match info with - | None -> None - | Some {datum} -> Some (off,datum,bits)) - + Map.to_alist stack.data + |> List.filter_map ~f:(fun (off, (info, bits)) -> + match info with + | None -> None + | Some { datum } -> Some (off, datum, bits)) let singleton entry stack = - {stack with data = Map.singleton (module Int) 0 entry} + { stack with data = Map.singleton (module Int) 0 entry } - let append (info,size) stack = match Map.max_elt stack.data with - | None -> singleton (info,size) stack - | Some (k,(_,s)) -> - let k' = stack.align info (k + bytes s) in - {stack with data = Map.add_exn stack.data k' (info,size)} + let append (info, size) stack = + match Map.max_elt stack.data with + | None -> singleton (info, size) stack + | Some (k, (_, s)) -> + let k' = stack.align info (k + bytes s) in + { + stack with + data = Map.add_exn stack.data ~key:k' ~data:(info, size); + } - let add ctype datum size = append (Some {ctype; datum},size) - let skip size = append (None,size) + let add ctype datum size = append (Some { ctype; datum }, size) + let skip size = append (None, size) let push ctype datum size stack = - Map.fold stack.data ~f:(fun ~key:_ ~data stack -> - append data stack) - ~init:(singleton (Some {ctype; datum},size) stack) + Map.fold stack.data + ~f:(fun ~key:_ ~data stack -> append data stack) + ~init:(singleton (Some { ctype; datum }, size) stack) let is_empty stack = Map.is_empty stack.data end module File = struct - type t = { - args : exp Map.M(Int).t; - bits : int; - } + type t = { args : exp Map.M(Int).t; bits : int } let bits self = self.bits + let deplet self = { self with args = Map.empty (module Int) } - let deplet self = {self with args = Map.empty (module Int)} - - let pop self = match Map.min_elt self.args with + let pop self = + match Map.min_elt self.args with | None -> None - | Some (k,x) -> - Some ({self with args = Map.remove self.args k},x) + | Some (k, x) -> Some ({ self with args = Map.remove self.args k }, x) - let popn n self = match Map.min_elt self.args with + let popn n self = + match Map.min_elt self.args with | None -> None - | Some (k,_) -> match Map.split self.args (k+n-1) with - | _,None,_ -> None - | lt,Some (_,x),rt -> - Some ({self with args = rt}, Map.data lt @ [x]) - - let align n self = match Map.min_elt self.args with + | Some (k, _) -> ( + match Map.split self.args (k + n - 1) with + | _, None, _ -> None + | lt, Some (_, x), rt -> + Some ({ self with args = rt }, Map.data lt @ [ x ])) + + let align n self = + match Map.min_elt self.args with | None -> None - | Some (k,_) -> - let k' = C.Size.next_multitude_of ~n k in - if k = k' then Some (self,()) - else match Map.split self.args k' with - | _,None,_ -> None - | _,_,rt when Map.is_empty rt -> None - | _,Some (k',x),rt -> - Some ({self with args = Map.add_exn rt k' x},()) - - let available {args} = Map.length args + | Some (k, _) -> ( + let k' = C.Size.next_multitude_of ~n k in + if k = k' then Some (self, ()) + else + match Map.split self.args k' with + | _, None, _ -> None + | _, _, rt when Map.is_empty rt -> None + | _, Some (k', x), rt -> + Some ({ self with args = Map.add_exn rt ~key:k' ~data:x }, ())) + + let available { args } = Map.length args let of_list = - List.foldi ~f:(fun pos regs reg -> Map.add_exn regs pos reg) + List.foldi + ~f:(fun pos regs reg -> Map.add_exn regs ~key:pos ~data:reg) ~init:Int.Map.empty - - let of_exps args = { - args = of_list args; - bits = match args with - | [] -> -1 - | r::_ -> match Type.infer_exn r with - | Imm x -> x - | _ -> -1 - } + let of_exps args = + { + args = of_list args; + bits = + (match args with + | [] -> -1 + | r :: _ -> ( match Type.infer_exn r with Imm x -> x | _ -> -1)); + } let create regs = of_exps @@ List.map ~f:Bil.var regs - let of_roles roles t = + let of_roles roles t = let regs = - Theory.Target.regs t ~roles |> - Set.to_list |> - List.map ~f:Var.reify in + Theory.Target.regs t ~roles |> Set.to_list |> List.map ~f:Var.reify + in create regs end @@ -497,26 +515,31 @@ module Arg = struct files : File.t Map.M(Int).t; stack : Stack.t option; ruler : C.Size.base; - where : [`Return | `Inputs | `Hidden]; + where : [ `Return | `Inputs | `Hidden ]; return : (C.Data.t * Bil.exp) option; inputs : (C.Data.t * Bil.exp) list; - hidden :(C.Type.t * (C.Data.t * Bil.exp)) list; + hidden : (C.Type.t * (C.Data.t * Bil.exp)) list; target : Theory.Target.t; } module Arg = struct - module State = struct type t = state end - include Monad.State.Make(State)(Monad.Option) - include Monad.State.T1(State)(Monad.Option) + module State = struct + type t = state + end + + include Monad.State.Make (State) (Monad.Option) + include Monad.State.T1 (State) (Monad.Option) + let reject : unit -> 'a t = fun () -> lift None + let catch : 'a t -> (unit -> 'a t) -> 'a t = - fun x err -> + fun x err -> let* s = get () in match run x s with | None -> err () - | Some (x,s) -> - let+ () = put s in - x + | Some (x, s) -> + let+ () = put s in + x end type semantics = unit @@ -529,58 +552,43 @@ module Arg = struct module Arena = struct let add file = let* s = Arg.get () in - let s = { - s with files = match Map.max_elt s.files with - | None -> Map.singleton (module Int) 0 file - | Some (k,_) -> - Map.add_exn s.files (k+1) file - } in + let s = + { + s with + files = + (match Map.max_elt s.files with + | None -> Map.singleton (module Int) 0 file + | Some (k, _) -> Map.add_exn s.files ~key:(k + 1) ~data:file); + } + in let+ () = Arg.put s in fst (Map.max_elt_exn s.files) let create regs = add (File.create (List.map ~f:Var.reify regs)) let of_exps xs = add (File.of_exps xs) let of_roles roles t = add (File.of_roles roles t) - - let iargs = of_roles Theory.Role.Register.[ - function_argument; - integer; - ] - - let irets = of_roles Theory.Role.Register.[ - function_return; - integer; - ] - - let fargs = of_roles Theory.Role.Register.[ - function_argument; - floating; - ] - - let frets = of_roles Theory.Role.Register.[ - function_return; - floating; - ] - - - let get {files} n = Map.find_exn files n - - let update s n f = match f (get s n) with + let iargs = of_roles Theory.Role.Register.[ function_argument; integer ] + let irets = of_roles Theory.Role.Register.[ function_return; integer ] + let fargs = of_roles Theory.Role.Register.[ function_argument; floating ] + let frets = of_roles Theory.Role.Register.[ function_return; floating ] + let get { files } n = Map.find_exn files n + + let update s n f = + match f (get s n) with | None -> Arg.reject () - | Some (arena,res) -> - Arg.put {s with files = Map.set s.files n arena} >>= fun () -> - Arg.return res + | Some (arena, res) -> + Arg.put { s with files = Map.set s.files ~key:n ~data:arena } + >>= fun () -> Arg.return res + let pop s n = update s n File.pop let popn ~n s a = update s a (File.popn n) let align ~n s a = update s a (File.align n) - let deplet s n = update s n @@ fun s -> Some (File.deplet s,()) + let deplet s n = update s n @@ fun s -> Some (File.deplet s, ()) end let size t = let* s = Arg.get () in - match s.ruler#bits t with - | None -> Arg.reject () - | Some x -> Arg.return x + match s.ruler#bits t with None -> Arg.reject () | Some x -> Arg.return x let alignment t = let+ s = Arg.get () in @@ -588,17 +596,12 @@ module Arg = struct let require cnd = if cnd then Arg.return () else Arg.reject () - let push_arg t exp = Arg.update @@ fun s -> + let push_arg t exp = + Arg.update @@ fun s -> match s.where with - | `Return -> { - s with return = Some (data s.ruler t,exp) - } - | `Hidden -> { - s with hidden = (t, (data s.ruler t, exp)) :: s.hidden - } - | `Inputs -> { - s with inputs = (data s.ruler t, exp) :: s.inputs - } + | `Return -> { s with return = Some (data s.ruler t, exp) } + | `Hidden -> { s with hidden = (t, (data s.ruler t, exp)) :: s.hidden } + | `Inputs -> { s with inputs = (data s.ruler t, exp) :: s.inputs } let register file t = let* s = Arg.get () in @@ -608,16 +611,15 @@ module Arg = struct let* arg = Arena.pop s file in push_arg t arg - let discard ?(n=1) file = - Arg.get () >>= fun s -> Arena.popn n s file >>| fun _ -> () + let discard ?(n = 1) file = + Arg.get () >>= fun s -> + Arena.popn ~n s file >>| fun _ -> () let registers_for_bits file bits = let+ s = Arg.get () in let regs = Arena.get s file in let abits = File.bits regs in - if abits > 0 - then Some ((bits - 1) / abits + 1) - else None + if abits > 0 then Some (((bits - 1) / abits) + 1) else None let count file t = let* s = Arg.get () in @@ -625,14 +627,12 @@ module Arg = struct | None -> Arg.return None | Some bits -> registers_for_bits file bits - let needs_some f = f >>= function - | None -> Arg.reject () - | Some x -> Arg.return x + let needs_some f = + f >>= function None -> Arg.reject () | Some x -> Arg.return x - let registers_needed file bits = - needs_some @@ registers_for_bits file bits + let registers_needed file bits = needs_some @@ registers_for_bits file bits - let concat ?(rev=false) xs = + let concat ?(rev = false) xs = List.reduce_exn ~f:Bil.concat (if rev then List.rev xs else xs) let registers ?rev ?limit file t = @@ -652,29 +652,25 @@ module Arg = struct let* s = Arg.get () in Arena.deplet s file - let switch where = Arg.update @@ fun s -> {s with where} + let switch where = Arg.update @@ fun s -> { s with where } let where = Arg.gets @@ fun s -> s.where let with_hidden f = let* was = where in switch `Hidden >>= fun () -> let* x = f () in - switch was >>| fun () -> - x + switch was >>| fun () -> x let reference file t = - with_hidden @@ fun () -> - register file (C.Type.pointer t) + with_hidden @@ fun () -> register file (C.Type.pointer t) - let pointer file t = - register file (C.Type.pointer t) + let pointer file t = register file (C.Type.pointer t) let update_stack f = let* s = Arg.get () in match s.stack with | None -> Arg.reject () - | Some stack -> - Arg.put {s with stack = Some (f stack)} + | Some stack -> Arg.put { s with stack = Some (f stack) } let push t = let* s = Arg.get () in @@ -686,14 +682,11 @@ module Arg = struct let* bits = size t in update_stack @@ Stack.add t (data s.ruler t) bits - let hidden t = - with_hidden @@ fun () -> - memory (C.Type.pointer t) - + let hidden t = with_hidden @@ fun () -> memory (C.Type.pointer t) let skip_memory bits = update_stack @@ Stack.skip bits let rebase slots = - let* {target} = Arg.get () in + let* { target } = Arg.get () in skip_memory (slots * Theory.Target.data_addr_size target) let load t bits sp base = @@ -701,20 +694,27 @@ module Arg = struct let width = Theory.Target.data_addr_size t in let addr = function | 0 -> Bil.(var sp) - | off -> Bil.(var sp + int (Word.of_int ~width off)) in + | off -> Bil.(var sp + int (Word.of_int ~width off)) + in let endianness = - if Theory.Endianness.(equal le (Theory.Target.endianness t)) - then LittleEndian else BigEndian in + if Theory.Endianness.(equal le (Theory.Target.endianness t)) then + LittleEndian + else BigEndian + in let load_byte off = - Bil.(load ~mem:(var mem) ~addr:(addr off) endianness `r8) in + Bil.(load ~mem:(var mem) ~addr:(addr off) endianness `r8) + in let rec load_bytes bytes loaded off = if loaded < bits then - load_bytes (load_byte off :: bytes) (loaded+8) (off+1) + load_bytes (load_byte off :: bytes) (loaded + 8) (off + 1) else - let bytes = match endianness with + let bytes = + match endianness with | LittleEndian -> bytes - | BigEndian -> List.rev bytes in - Bil.(cast low) bits (concat bytes) in + | BigEndian -> List.rev bytes + in + Bil.(cast low) bits (concat bytes) + in match Size.of_int_opt bits with | Some r -> Bil.(load ~mem:(var mem) ~addr:(addr base) endianness r) | None -> load_bytes [] 0 base @@ -723,9 +723,7 @@ module Arg = struct let stack = let* s = Arg.get () in - match s.stack with - | None -> Arg.reject () - | Some stack -> Arg.return stack + match s.stack with None -> Arg.reject () | Some stack -> Arg.return stack let split_with_memory ?rev ?limit file typ = let* s = Arg.get () in @@ -734,19 +732,18 @@ module Arg = struct let regs = Arena.get s file in let limit = Option.value limit ~default:(File.available regs) in let available = min limit (File.available regs) in - if available >= needed - then + if available >= needed then let* args = Arena.popn ~n:needed s file in - push_arg typ @@ concat ?rev args + push_arg typ @@ concat ?rev args else let* stk = stack in let* t = target in let base = Stack.base stk in - let mbits = bits - available * File.bits regs in + let mbits = bits - (available * File.bits regs) in require (Stack.is_empty stk && available > 0) >>= fun () -> let* regs = Arena.popn ~n:available s file in skip_memory mbits >>= fun () -> - push_arg typ @@ concat ?rev (regs@[load t mbits base 0]) + push_arg typ @@ concat ?rev (regs @ [ load t mbits base 0 ]) let popn_bits arena bits = let* s = Arg.get () in @@ -757,54 +754,53 @@ module Arg = struct let split f1 f2 typ = let* bits = size typ in - let* regs1 = popn_bits f1 (bits/2) in - let* regs2 = popn_bits f2 (bits/2) in - require (is_even bits) >>= fun () -> - push_arg typ @@ concat (regs1@regs2) + let* regs1 = popn_bits f1 (bits / 2) in + let* regs2 = popn_bits f2 (bits / 2) in + require (is_even bits) >>= fun () -> push_arg typ @@ concat (regs1 @ regs2) let either op1 op2 = Arg.catch op1 (fun _ -> op2) - let (<|>) = either + let ( <|> ) = either let choice options = match List.reduce ~f:either options with | Some option -> option | None -> Arg.reject () - - let define ?(return=Arg.return ()) inputs = Arg.sequence [ - switch `Return; - return; - switch `Inputs; - inputs; - ] + let define ?(return = Arg.return ()) inputs = + Arg.sequence [ switch `Return; return; switch `Inputs; inputs ] let reify target ruler grammar : args option = let ruler = (ruler :> Bap_c_size.base) in - let init = { - stack = Stack.create ruler target; - ruler; - files = Map.empty (module Int); - where = `Inputs; - return = None; - inputs = []; - hidden = []; - target; - } in + let init = + { + stack = Stack.create ruler target; + ruler; + files = Map.empty (module Int); + where = `Inputs; + return = None; + inputs = []; + hidden = []; + target; + } + in match Arg.run grammar init with | None -> None - | Some ((),{stack;return;inputs;hidden}) -> - let memory = match stack with - | None -> [] - | Some stack -> - let base = Stack.base stack in - Stack.slots stack |> - List.map ~f:(fun (off,data,bits) -> - data,load target bits base off) in - Some { - return; - params = List.rev_append inputs memory; - hidden = List.rev hidden; - } + | Some ((), { stack; return; inputs; hidden }) -> + let memory = + match stack with + | None -> [] + | Some stack -> + let base = Stack.base stack in + Stack.slots stack + |> List.map ~f:(fun (off, data, bits) -> + (data, load target bits base off)) + in + Some + { + return; + params = List.rev_append inputs memory; + hidden = List.rev hidden; + } let unless cnd prog = if not cnd then prog else Arg.return () let on cnd prog = if cnd then prog else Arg.return () @@ -816,19 +812,19 @@ module Arg = struct let install target ruler pass = let open Bap_core_theory in let abi_name = KB.Name.unqualified (Theory.Target.name target) in - let abi_processor = { - apply_attrs = (fun _ x -> x); - insert_args = fun _ attrs proto -> - reify target ruler (pass attrs proto) - } in + let abi_processor = + { + apply_attrs = (fun _ x -> x); + insert_args = + (fun _ attrs proto -> reify target ruler (pass attrs proto)); + } + in register_abi abi_name abi_processor; register_model target ruler; Bap_abi.register_pass @@ fun proj -> - if Theory.Target.equal (Project.target proj) target - then begin + if Theory.Target.equal (Project.target proj) target then ( Bap_api.process (create_api_processor ruler abi_processor); - Project.set proj Bap_abi.name abi_name - end + Project.set proj Bap_abi.name abi_name) else proj module Language = struct @@ -838,23 +834,28 @@ module Arg = struct type statements = statement list type command = predicate * statement type commands = command list - type 'a cls = [>] as 'a + type 'a cls = [> ] as 'a module type V1 = sig - val install : Theory.Target.t -> #Bap_c_size.base -> - ((?finish:(unit Arg.t) -> - return:(alignment:int -> int -> statement) -> - (alignment:int -> int -> statement) -> unit Arg.t) -> + val install : + Theory.Target.t -> + #Bap_c_size.base -> + ((?finish:unit Arg.t -> + return:(alignment:int -> int -> statement) -> + (alignment:int -> int -> statement) -> unit Arg.t) -> + unit Arg.t) -> unit val sequence : commands -> statement val select : commands -> statement - val case : (ctype -> 'a cls Arg.t) -> ('a cls * statement) list -> statement + + val case : + (ctype -> 'a cls Arg.t) -> ('a cls * statement) list -> statement + val any : predicates -> predicate val all : predicates -> predicate val neither : predicates -> predicate - val is : bool -> predicate val otherwise : predicate val always : predicate @@ -864,74 +865,69 @@ module Arg = struct include Monad.Syntax.S with type 'a t := 'a Arg.t include Monad.Syntax.Let.S with type 'a t := 'a Arg.t - end - module V1 : V1 = struct + module V1 : V1 = struct let sequence cmds arg = - Arg.List.iter cmds ~f:(fun (cnd,action) -> + Arg.List.iter cmds ~f:(fun (cnd, action) -> if cnd arg then action arg else Arg.return ()) let select options arg = - List.find_map options ~f:(fun (cnd,action) -> - if cnd arg then Some (action arg) else None) |> function + List.find_map options ~f:(fun (cnd, action) -> + if cnd arg then Some (action arg) else None) + |> function | Some action -> action | None -> Arg.reject () - - let case - : (ctype -> 'a cls Arg.t) -> ('a cls * statement) list -> statement - = fun classify cmds arg -> - let* cls = classify arg in - List.find_map cmds ~f:(fun (case,cmd) -> - Option.some_if (Poly.equal cls case) cmd) |> function - | None -> Arg.reject () - | Some cmd -> cmd arg + let case : + (ctype -> 'a cls Arg.t) -> ('a cls * statement) list -> statement = + fun classify cmds arg -> + let* cls = classify arg in + List.find_map cmds ~f:(fun (case, cmd) -> + Option.some_if (Poly.equal cls case) cmd) + |> function + | None -> Arg.reject () + | Some cmd -> cmd arg let is cnd = const cnd let otherwise = is true let any ps x = List.exists ps ~f:(fun p -> p x) let all ps x = List.for_all ps ~f:(fun p -> p x) let neither ps x = List.for_all ps ~f:(fun p -> not (p x)) - - let choose options arg = - choice (List.map options ~f:(fun f -> f arg)) - + let choose options arg = choice (List.map options ~f:(fun f -> f arg)) let otherwise = Fn.const true let always = Fn.const true let never = Fn.const false - let combine xs arg = Arg.List.iter xs ~f:(fun x -> x arg) - - let install - : Theory.Target.t -> #Bap_c_size.base -> + let install : + Theory.Target.t -> + #Bap_c_size.base -> ((?finish:unit Arg.t -> - return:(alignment:int -> int -> statement) -> - (alignment:int -> int -> statement) -> unit Arg.t) -> + return:(alignment:int -> int -> statement) -> + (alignment:int -> int -> statement) -> unit Arg.t) -> + unit Arg.t) -> unit = - fun target data k -> - install target data @@ fun _ {Bap_c_type.Proto.return=r; args=xs} -> - k @@ fun ?(finish=Arg.return ()) ~return args -> - let return = match r with + fun target data k -> + install target data + @@ fun _ { Bap_c_type.Proto.return = r; args = xs } -> + k @@ fun ?(finish = Arg.return ()) ~return args -> + let return = + match r with | `Void -> Arg.return () | _ -> - let* size = size r in - let* alignment = alignment r in - return ~alignment size r in - let inputs = Arg.List.iter xs ~f:(fun (_,t) -> - let* size = size t in - let* alignment = alignment t in - args ~alignment size t) in - Arg.sequence [ - switch `Return; - return; - switch `Inputs; - inputs; - finish; - ] - + let* size = size r in + let* alignment = alignment r in + return ~alignment size r + in + let inputs = + Arg.List.iter xs ~f:(fun (_, t) -> + let* size = size t in + let* alignment = alignment t in + args ~alignment size t) + in + Arg.sequence [ switch `Return; return; switch `Inputs; inputs; finish ] include Arg.Syntax include Arg.Let @@ -939,7 +935,9 @@ module Arg = struct include V1 end + include Arg + let reject = Arg.reject end diff --git a/lib/bap_c/bap_c_abi.mli b/lib/bap_c/bap_c_abi.mli index 39037b170..b3ac83624 100644 --- a/lib/bap_c/bap_c_abi.mli +++ b/lib/bap_c/bap_c_abi.mli @@ -1,212 +1,183 @@ (** C language ABI. - This module provides a common interface for building ABI support - modules for C language. -*) + This module provides a common interface for building ABI support modules for + C language. *) -open Core_kernel[@@warning "-D"] +open Core open Bap_core_theory open Bap.Std open Monads.Std open Bap_c_type - -(** Function formal parameter is represented as a pair of - an abstraction of data, that is passed via the parameter, - and a BIL expression, that denotes the parameter.*) type param = Bap_c_data.t * exp +(** Function formal parameter is represented as a pair of an abstraction of + data, that is passed via the parameter, and a BIL expression, that denotes + the parameter.*) -(** subroutine argument list is split into three parts: - [return] is the return arguments, that is optional; - [params] are regular positional parameters, the length - of the [params] list must equal to the amount of the - formals in the function prototype; - [hidden] are hidden parameters, that are inserted by abi - to pass special arguments, like [this] pointer or a pointer - to a structural value, for example. - - The api processor, created by this module, will insert arg terms - into sub in the following way: - - - nth positional argument corresponds to nth arg term (counting - from 0). - - the last arg term corresponds to the return argument, if any; - - all hidden arguments are put between the last positional and the - return argument.*) type args = { return : param option; hidden : (Bap_c_type.t * param) list; params : param list; } +(** subroutine argument list is split into three parts: [return] is the return + arguments, that is optional; [params] are regular positional parameters, the + length of the [params] list must equal to the amount of the formals in the + function prototype; [hidden] are hidden parameters, that are inserted by abi + to pass special arguments, like [this] pointer or a pointer to a structural + value, for example. -(** an abi processor. - Each architecture registers its own abi processor, that is - responsible for dispatching the processed subroutine between - architecture specific abi processors.*) -type t = { - (** [insert_args sub attrs proto] infer a list of arguments that - should be inserted for a subroutine [sub] annotated with the - attribute list [attrs] *) - insert_args : sub term -> attr list -> proto -> args option; + The api processor, created by this module, will insert arg terms into sub in + the following way: - (** [apply_attrs attrs sub] transform a subroutine based on the - semantics of the list of attributes, attached to it. See also, - C.Attr.register + - nth positional argument corresponds to nth arg term (counting from 0). + - the last arg term corresponds to the return argument, if any; + - all hidden arguments are put between the last positional and the return + argument.*) - *) +type t = { + insert_args : sub term -> attr list -> proto -> args option; + (** [insert_args sub attrs proto] infer a list of arguments that should be + inserted for a subroutine [sub] annotated with the attribute list + [attrs] *) apply_attrs : attr list -> sub term -> sub term; + (** [apply_attrs attrs sub] transform a subroutine based on the semantics + of the list of attributes, attached to it. See also, C.Attr.register + *) } +(** an abi processor. Each architecture registers its own abi processor, that is + responsible for dispatching the processed subroutine between architecture + specific abi processors.*) -(** [create_api_processor size t] packs an api processor. The - processor will insert arg terms into each recognized subroutine, - propagate some known C attributes into corresponding BIR - attributes, annotate each inserted arg term with its corresponding - C type and datum model, and annotate each regognized subroutine - with its C prototype. - - The api processor relies on an availability of a front end parser - for C language.*) val create_api_processor : #Bap_c_size.base -> t -> Bap_api.t +(** [create_api_processor size t] packs an api processor. The processor will + insert arg terms into each recognized subroutine, propagate some known C + attributes into corresponding BIR attributes, annotate each inserted arg + term with its corresponding C type and datum model, and annotate each + regognized subroutine with its C prototype. + The api processor relies on an availability of a front end parser for C + language.*) -(** [data size t] creates an abstraction of data that is represented - by type [t]. The [size] parameter defines a data model, e.g., - sizes of primitive types, padding and alignment restrictions, etc. - - The abstraction includes inner and trailing paddings, when - necessary. *) val data : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.t +(** [data size t] creates an abstraction of data that is represented by type + [t]. The [size] parameter defines a data model, e.g., sizes of primitive + types, padding and alignment restrictions, etc. + The abstraction includes inner and trailing paddings, when necessary. *) +val layout : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.layout (** [layout size t] computes the c data type layout. @since 2.5.0 *) -val layout : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.layout - -(** [model target] returns the data model for the given target. - - @since 2.5.0 *) val model : Theory.Target.t -> Bap_c_size.base +(** [model target] returns the data model for the given target. + @since 2.5.0 *) +val apply : t -> #Bap_c_size.base -> attr list -> proto -> sub term -> sub term (** [apply processor attrs proto sub] applies the abi processor to the subroutine [sub]. - The function inserts arguments and attaches appropriate arguments - to the function and its subterms, such as strores the type of each - argument, the provided C attributes, stores the prototype, computes - and attaches data layouts, etc. + The function inserts arguments and attaches appropriate arguments to the + function and its subterms, such as strores the type of each argument, the + provided C attributes, stores the prototype, computes and attaches data + layouts, etc. @since 2.5.0 *) -val apply : t -> #Bap_c_size.base -> attr list -> proto -> sub term -> sub term - -(** [arg_intent t] infers argument intention based on its C type. If - an argument is passed by value, i.e., it is a c basic type, then - it is an input argument. If an argument is a reference, but not a - function, then it is input/output if any value, referenced by the - argument is non-const. A reference to function always has the - input intent. If an argyment is a structure or union, then it is - input/output if any of its fields is input/output. -*) val arg_intent : Bap_c_type.t -> intent +(** [arg_intent t] infers argument intention based on its C type. If an argument + is passed by value, i.e., it is a c basic type, then it is an input + argument. If an argument is a reference, but not a function, then it is + input/output if any value, referenced by the argument is non-const. A + reference to function always has the input intent. If an argyment is a + structure or union, then it is input/output if any of its fields is + input/output. *) -(** [register name t] registers an abi processor [t] named [name] that - may be used by subroutines in this project. - - @after 2.5.0 fails if there is already a processor for the given [name]. - @after 2.5.0 the abi name should be a valid target name. -*) val register : string -> t -> unit [@@deprecated "[since 2022-07] use the Arg module"] +(** [register name t] registers an abi processor [t] named [name] that may be + used by subroutines in this project. + + after 2.5.0 fails if there is already a processor for the given [name]. + after 2.5.0 the abi name should be a valid target name. *) -(** [get_processor name] is used to access an abi processor with its - name.*) val get_processor : string -> t option [@@deprecated "[since 2022-07] use [lookup]"] +(** [get_processor name] is used to access an abi processor with its name.*) - -(** [lookup t] the abi processor associated with the target [t]. - - @since 2.5.0 -*) val lookup : Theory.Target.t -> t option +(** [lookup t] the abi processor associated with the target [t]. + @since 2.5.0 *) -(** An abstraction of a stack, commonly used in C compilers. *) +(** An abstraction of a stack, commonly used in C compilers. *) module Stack : sig - (** [stack = create ?growsup arch] is a function that returns - [n]'th stack slot *) val create : ?growsup:Bool.t -> arch -> int -> exp + (** [stack = create ?growsup arch] is a function that returns [n]'th stack + slot *) end - (** A monadic eDSL for argument passing semantics specification. @since 2.5.0 see also the [Language] module for a higher-level + eDSL, built on top of the primitives described below. - This DSL helps in defining the abi processor's [insert_args] - function. The DSL describes the semantics of argument passing that - is then reified to the [args] structure. The [DSL] is a choice - monad that enables describing the argument passing grammar using - backtracking when the chosen strategy doesn't fit. The [reject ()] - operator will reject the current computation up until the nearest - choice prompt, e.g., in the following example, computations [e1], - [e2], and [e3] are rejected and any side-effects that they might - had are ignored and, instead the [option2] computation is tried - as if the previous sequence had never happend. + This DSL helps in defining the abi processor's [insert_args] function. The + DSL describes the semantics of argument passing that is then reified to the + [args] structure. The [DSL] is a choice monad that enables describing the + argument passing grammar using backtracking when the chosen strategy doesn't + fit. The [reject ()] operator will reject the current computation up until + the nearest choice prompt, e.g., in the following example, computations + [e1], [e2], and [e3] are rejected and any side-effects that they might had + are ignored and, instead the [option2] computation is tried as if the + previous sequence had never happend. {[ - choice [ - sequence [e1; e2; e3; reject ()]; - option2; - ] + choice [ sequence [ e1; e2; e3; reject () ]; option2 ] ]} - Since the purpose of this DSL is to describe how the passed - arguments are read in terms of BIL expressions, the generated - specification could be seen as a grammar and the DSL itself as - a parser combinator, specialized for describing ABI. + Since the purpose of this DSL is to describe how the passed arguments are + read in terms of BIL expressions, the generated specification could be seen + as a grammar and the DSL itself as a parser combinator, specialized for + describing ABI. {2 Example} - Below we define the semantics of [riscv32] and [riscv64] targets. - Both targets have fully specified register files with properly - assigned roles and the register order matches with register names - ordering, so we can use the simplified Arena creating functions. - We have four independent arenas, two for passing in and out integer - arguments, and two corresponding arenas for floating-point - arguments. - - We start with defining the integer calling convention by first - determining how many register are needed to pass an argument. - If the size of the argument couldn't be determined we reject the - computation. Otherwise, if it fits into one register we try to - pass it via a register fallback to memory if there are no - registers available. If it requires two registers we first try to - pass it as an aligned register pair (with the first part going - through the nearest available even register). If we don't have - enough aligned registers, we then split it in two parts and pass - the first part in a register and the second part in the memory. - Finally, if the size is greater than two words we pass it as an - implicit reference. - - The floating-point calling convention assumes the presence of the - hardware floating-point registers but the specification is general - enough to handle the soft floats convention, as any attempt to - pass an argument via the hardware floating-point registers will be - rejected since the corresponding arena will be empty. - - The convention tries to pass a floating-point argument via the - corresponding register file if it fits into a register otherwise - it falls back to the integer registers. When an argument fits into - the floating-point register we first try passing it through the - floating-point file and if it is out of registers we use available - integer registers (in riscv with hardware floating-point registers - it is possible to pass 16 floating-point arguments all in - registers) and finally use the last resort option of using the memory. + Below we define the semantics of [riscv32] and [riscv64] targets. Both + targets have fully specified register files with properly assigned roles and + the register order matches with register names ordering, so we can use the + simplified Arena creating functions. We have four independent arenas, two + for passing in and out integer arguments, and two corresponding arenas for + floating-point arguments. + + We start with defining the integer calling convention by first determining + how many register are needed to pass an argument. If the size of the + argument couldn't be determined we reject the computation. Otherwise, if it + fits into one register we try to pass it via a register fallback to memory + if there are no registers available. If it requires two registers we first + try to pass it as an aligned register pair (with the first part going + through the nearest available even register). If we don't have enough + aligned registers, we then split it in two parts and pass the first part in + a register and the second part in the memory. Finally, if the size is + greater than two words we pass it as an implicit reference. + + The floating-point calling convention assumes the presence of the hardware + floating-point registers but the specification is general enough to handle + the soft floats convention, as any attempt to pass an argument via the + hardware floating-point registers will be rejected since the corresponding + arena will be empty. + + The convention tries to pass a floating-point argument via the corresponding + register file if it fits into a register otherwise it falls back to the + integer registers. When an argument fits into the floating-point register we + first try passing it through the floating-point file and if it is out of + registers we use available integer registers (in riscv with hardware + floating-point registers it is possible to pass 16 floating-point arguments + all in registers) and finally use the last resort option of using the + memory. {[ module Arg = C.Abi.Arg @@ -271,277 +242,246 @@ end let () = List.iter ~f:define Bap_risv_target.[riscv32; riscv64] ]} - @since 2.3.0 -*) + @since 2.3.0 *) module Arg : sig type 'a t - (** an ordered expendable collection of registers *) type arena + (** an ordered expendable collection of registers *) type semantics - type ctype = Bap_c_type.t - - (** [define ?return args] the toplevel function for defining - argument passing semantics. - - The function has two entries, the optional [return] entry - describes the semantics of passing of the return value, - and the second section describes the semantics of passing the - list of arguments. - - The semantics is defined as a sequence of these two rules, - with the return rule evaluated first. Therefore, if [return] - is rejected the whole semantics will be rejected. - *) val define : ?return:unit t -> unit t -> semantics t + (** [define ?return args] the toplevel function for defining argument passing + semantics. - (** [register arena t] passes the argument of type [t] using - the next available register in [arena]. - - The computation is rejected if no registers are available; - if [t] doesn't fit into a register in [arena]; or if size - of [t] can't be determined. - *) - val register : arena -> ctype -> unit t + The function has two entries, the optional [return] entry describes the + semantics of passing of the return value, and the second section describes + the semantics of passing the list of arguments. + The semantics is defined as a sequence of these two rules, with the return + rule evaluated first. Therefore, if [return] is rejected the whole + semantics will be rejected. *) - (** [registers arena t] passes the argument in consecutive - registers from [arena]. - - Rejects the computation if [arena] doesn't have the necessary - number of registers; the number of required registers is greater - than [limit]; or if the size of [t] is unknown. + val register : arena -> ctype -> unit t + (** [register arena t] passes the argument of type [t] using the next + available register in [arena]. - If [rev] is true, then the allocated registers will be used in - the reversed order. + The computation is rejected if no registers are available; if [t] doesn't + fit into a register in [arena]; or if size of [t] can't be determined. *) - @since 2.5.0 accepts the optional [rev] parameter. - *) val registers : ?rev:bool -> ?limit:int -> arena -> ctype -> unit t + (** [registers arena t] passes the argument in consecutive registers from + [arena]. + Rejects the computation if [arena] doesn't have the necessary number of + registers; the number of required registers is greater than [limit]; or if + the size of [t] is unknown. - (** [align_even arena] ensures that the first available register in - [arena] has even number. + If [rev] is true, then the allocated registers will be used in the + reversed order. - Registers in an arena are enumerated from zero in the order of - their appearence in the arena specification. This function - removes, when necessary, a register form the arena, so that the - next available register has an even number. + @since 2.5.0 accepts the optional [rev] parameter. *) - The computation is rejected if there are no more even registers - in [arena]. - *) val align_even : arena -> unit t + (** [align_even arena] ensures that the first available register in [arena] + has even number. - (** [deplet arena] unconditionally consumes all registers in arena. + Registers in an arena are enumerated from zero in the order of their + appearence in the arena specification. This function removes, when + necessary, a register form the arena, so that the next available register + has an even number. + + The computation is rejected if there are no more even registers in + [arena]. *) - The computation is never rejected. - *) val deplet : arena -> unit t + (** [deplet arena] unconditionally consumes all registers in arena. + The computation is never rejected. *) + val discard : ?n:int -> arena -> unit t (** [discard arena] discards one location from [arena]. @param n if specified discards [n] registers instead of one. - The computation is never rejected, if the arena is empty nothing - is changed. - - @since 2.5.0 *) - val discard : ?n:int -> arena -> unit t + The computation is never rejected, if the arena is empty nothing is + changed. - (** [reference arena t] passes a hidden pointer to [t] via - the first available register in [arena]. - - Rejects the computation if there are no available registers in - [arena]. The size of [t] is not required. + @since 2.5.0 *) - Note, that [reference] and [hidden] are increasing the number of - hidden arguments of a subroutine, but do not add the actual - arguments. *) val reference : arena -> ctype -> unit t + (** [reference arena t] passes a hidden pointer to [t] via the first available + register in [arena]. + Rejects the computation if there are no available registers in [arena]. + The size of [t] is not required. - (** [pointer arena t] passes argument [t] as a pointer. - - Rejects the computation if [arena] is empty. The size of [t] is - not required. + Note, that [reference] and [hidden] are increasing the number of hidden + arguments of a subroutine, but do not add the actual arguments. *) - @since 2.5.0 *) val pointer : arena -> ctype -> unit t + (** [pointer arena t] passes argument [t] as a pointer. + Rejects the computation if [arena] is empty. The size of [t] is not + required. - (** [hidden t] inserts a hidden pointer to [t] into the next - available stack slot. - - The computation is rejected if the target doesn't have a stack. + @since 2.5.0 *) - @since 2.5.0 *) val hidden : ctype -> unit t + (** [hidden t] inserts a hidden pointer to [t] into the next available stack + slot. - (** [memory t] passes the argument of type [t] in the next - available stack slot. - - Rejects the computation if the size of [t] is not known or - if the target doesn't have a register with the stack pointer - role. - - The address of the slot is aligned corresponding to the - alignment requirements of [t] but no less than the - minimal data alignment requirements of the architecture or - the natural alignment of the stack pointer. + The computation is rejected if the target doesn't have a stack. - Note, passing a number arguments via a descending stack using - [memory] will pass the arguments in the right-to-left (RTL aka - C) order, i.e., the first passed argument will end up at the - bottom (will have the minimal address). Use [push] if you want - the left-to-right order. + @since 2.5.0 *) - *) val memory : ctype -> unit t + (** [memory t] passes the argument of type [t] in the next available stack + slot. - (** [rebase off] rebases the stack position by [n] words. + Rejects the computation if the size of [t] is not known or if the target + doesn't have a register with the stack pointer role. - @since 2.5.0 - *) - val rebase : int -> unit t + The address of the slot is aligned corresponding to the alignment + requirements of [t] but no less than the minimal data alignment + requirements of the architecture or the natural alignment of the stack + pointer. - (** [split a1 a2 t] passes the lower half of the value - via registers in the arena [a1] and the higher via the registers - in the arena [a2]. + Note, passing a number arguments via a descending stack using [memory] + will pass the arguments in the right-to-left (RTL aka C) order, i.e., the + first passed argument will end up at the bottom (will have the minimal + address). Use [push] if you want the left-to-right order. *) - The compuation is rejected if either arean doesn't have enough - registers to pass its half, or if the size of [t] is odd or unknown. + val rebase : int -> unit t + (** [rebase off] rebases the stack position by [n] words. @since 2.5.0 *) + val split : arena -> arena -> ctype -> unit t + (** [split a1 a2 t] passes the lower half of the value via registers in the + arena [a1] and the higher via the registers in the arena [a2]. + The compuation is rejected if either arean doesn't have enough registers + to pass its half, or if the size of [t] is odd or unknown. - (** [split_with_memory arena t] passes the low order part of the - value in a registers (if available) and the rest in the memory. + @since 2.5.0 *) - The size of the part that is passed via the registers is equal - to the number of avaliable registers, but not greater than the - [limit] (if specified). The part that is passed via the - stack is aligned to the stack boundary. + val split_with_memory : ?rev:bool -> ?limit:int -> arena -> ctype -> unit t + (** [split_with_memory arena t] passes the low order part of the value in a + registers (if available) and the rest in the memory. - If [rev] is [true] then pass the object in the reversed order. + The size of the part that is passed via the registers is equal to the + number of avaliable registers, but not greater than the [limit] (if + specified). The part that is passed via the stack is aligned to the stack + boundary. - Rejects the computation if the size of [t] is not known; if - [arena] is empty; or if some other argument is already passed - via memory. + If [rev] is [true] then pass the object in the reversed order. - @after 2.5.0 accepts the [rev] parameter. - @after 2.5.0 accepts the [limit] parameter. + Rejects the computation if the size of [t] is not known; if [arena] is + empty; or if some other argument is already passed via memory. - @after 2.5.0 passes as much as possible (up to the limit) of the - object via registers. + after 2.5.0 accepts the [rev] parameter. after 2.5.0 accepts the [limit] + parameter. - @before 2.5.0 was passing at most one word via registers. - *) - val split_with_memory : ?rev:bool -> ?limit:int -> arena -> ctype -> unit t + after 2.5.0 passes as much as possible (up to the limit) of the object via + registers. + @before 2.5.0 was passing at most one word via registers. *) + val push : ctype -> unit t (** [push t] pushes the argument of type [t] via stack. Rejects the computation if the size of [t] is not known. - The address of the slot is aligned corresponding to the - alignment requirements of [t] but no less than the - minimal data alignment requirements of the architecture or - the natural alignment of the stack pointer. - - When passing a number of arguments via a descending stack, the - last pushed argument will be at the bottom of the stack, i.e., - will have the minimal address. This corresponds to the LTR aka - Pascal ordering. - *) - val push : ctype -> unit t + The address of the slot is aligned corresponding to the alignment + requirements of [t] but no less than the minimal data alignment + requirements of the architecture or the natural alignment of the stack + pointer. - (** [count arena t] counts the number of registers need to pass a - value of type [t]. + When passing a number of arguments via a descending stack, the last pushed + argument will be at the bottom of the stack, i.e., will have the minimal + address. This corresponds to the LTR aka Pascal ordering. *) - Returns [None] if the size of [t] is not known or if the [arena] - size is empty. - *) val count : arena -> ctype -> int option t + (** [count arena t] counts the number of registers need to pass a value of + type [t]. + Returns [None] if the size of [t] is not known or if the [arena] size is + empty. *) + val size : ctype -> int t (** [size t] is the size in bits of an object of type [t]. - The computation is rejected if the size is unknown, i.e., the - type is incomplete. + The computation is rejected if the size is unknown, i.e., the type is + incomplete. @since 2.5.0 *) - val size : ctype -> int t - (** [require cnd] rejects the computation if [cnd] doesn't hold. - - @since 2.5.0 *) val require : bool -> unit t + (** [require cnd] rejects the computation if [cnd] doesn't hold. - (** [either option1 option2] tries to pass using [option1] and - if it is rejected uses [option2]. + @since 2.5.0 *) - For example, [either (register x) (memory x)] tries to pass - [x] via a register and if it is not possible (either because - [x] doesn't fit into a register or there are no registers - available) tries to pass it via memory. - *) val either : 'a t -> 'a t -> 'a t + (** [either option1 option2] tries to pass using [option1] and if it is + rejected uses [option2]. + For example, [either (register x) (memory x)] tries to pass [x] via a + register and if it is not possible (either because [x] doesn't fit into a + register or there are no registers available) tries to pass it via memory. + *) - (** [choice [o1 o2 ... oN]] tries options in order until the first - one that is not rejected. *) val choice : 'a t list -> 'a t + (** [choice [o1 o2 ... oN]] tries options in order until the first one that is + not rejected. *) + + (** A high-level ABI-specification language. + + This module makes it easier to describe various calling conventions using + high-level combinators, built on top of the lower level primitives of the + [Arg] language. + + The idea is that you can open [Arg.Language] and have all combinators + available in the scope. To enable language extension, we also provide the + versioned modules, [Arg.Language.V1], and so on. Every extension of the + language, i.e., an addition of a new operator or combinator, will go into + a separate module, so that if you are using [Arg.Language.Vx] it is + guaranteed that the language changes will not break anything. + + In [Arg.Language] we describe calling conventions declaratively using a + list of commands, where each command is a guarded statement, i.e., a pair + of a predicate and the statement. The statement is using the [Arg] + operators to describe argument passing routine, and the predicate checks + if this is applicable to the given arguement. Various combinators combine + commands and predicates, into a final statement that fully describes the + argument passing procedure for the given subroutine. - (** A high-level ABI-specification language. - - This module makes it easier to describe various calling - conventions using high-level combinators, built on top - of the lower level primitives of the [Arg] language. - - The idea is that you can open [Arg.Language] and have all - combinators available in the scope. To enable language - extension, we also provide the versioned modules, - [Arg.Language.V1], and so on. Every extension of the language, - i.e., an addition of a new operator or combinator, will - go into a separate module, so that if you are using - [Arg.Language.Vx] it is guaranteed that the language changes - will not break anything. - - - In [Arg.Language] we describe calling conventions declaratively - using a list of commands, where each command is a guarded - statement, i.e., a pair of a predicate and the statement. The - statement is using the [Arg] operators to describe argument - passing routine, and the predicate checks if this is applicable - to the given arguement. Various combinators combine commands - and predicates, into a final statement that fully describes the - argument passing procedure for the given subroutine. - - @since 2.5.0 - *) + @since 2.5.0 *) module Language : sig - type predicate = ctype -> bool type statement = ctype -> unit t type predicates = predicate list type statements = statement list type command = predicate * statement type commands = command list - type 'a cls = [>] as 'a + type 'a cls = [> ] as 'a module type V1 = sig - + val install : + Theory.Target.t -> + #Bap_c_size.base -> + ((?finish:unit t -> + return:(alignment:int -> int -> statement) -> + (alignment:int -> int -> statement) -> + unit t) -> + unit t) -> + unit (** [install target data_model specification] - The toplevel function that is used to install the calling - convention for the given target. The general syntax follows - this structure, + The toplevel function that is used to install the calling convention + for the given target. The general syntax follows this structure, {[ describe t data @@ fun declare -> @@ -554,8 +494,8 @@ module Arg : sig declare ~finish ~return arg ]} - For example, the following specification describes ARM - calling convention AAPCS32, + For example, the following specification describes ARM calling + convention AAPCS32, {[ let define t = @@ -563,42 +503,39 @@ module Arg : sig let* iargs = Arg.Arena.iargs t in let* irets = Arg.Arena.irets t in let rev = Theory.Endianness.(Theory.Target.endianness t = le) in - let return ~alignment:_ size = select [ - C.Type.is_basic, select [ - is (size <= 32 * 2), Arg.registers ~rev irets; - otherwise, Arg.reference iargs; - ]; - is (size <= 32), Arg.register irets; - otherwise, Arg.reference iargs; - ] in + let return ~alignment:_ size = + select + [ + ( C.Type.is_basic, + select + [ + (is (size <= 32 * 2), Arg.registers ~rev irets); + (otherwise, Arg.reference iargs); + ] ); + (is (size <= 32), Arg.register irets); + (otherwise, Arg.reference iargs); + ] + in describe ~return @@ fun ~alignment _ -> - sequence [ - is (alignment = 64), const (Arg.align_even iargs); - always, choose [ - Arg.split_with_memory ~rev iargs; - Arg.memory - ]; - ] - ]} - *) - val install : Theory.Target.t -> #Bap_c_size.base -> - ((?finish:(unit t) -> - return:(alignment:int -> int -> statement) -> - (alignment:int -> int -> statement) -> unit t) -> - unit t) -> - unit - + sequence + [ + (is (alignment = 64), const (Arg.align_even iargs)); + ( always, + choose [ Arg.split_with_memory ~rev iargs; Arg.memory ] ); + ] + ]} *) + val sequence : commands -> statement (** [sequence cmd] executes a sequence of predicated statements. - Executes all commands in the specified order. A command's - statement is executed if the commands predicate is evaluates - to [true]. Otherwise the statement is skipped. + Executes all commands in the specified order. A command's statement is + executed if the commands predicate is evaluates to [true]. Otherwise + the statement is skipped. - If an evaluated statement rejects a computation then the - whole sequence will be rejected. *) - val sequence : commands -> statement + If an evaluated statement rejects a computation then the whole + sequence will be rejected. *) + val select : commands -> statement (** [select t spec] selects the first applicable option to pass [t]. The form, @@ -611,192 +548,170 @@ module Arg : sig ] ]} - Tries [pred1 arg], [pred2 arg], ..., [predN arg] in order until - the first one that returns [true] and uses the corresponding - option to pass the argument. + Tries [pred1 arg], [pred2 arg], ..., [predN arg] in order until the + first one that returns [true] and uses the corresponding option to + pass the argument. - The computation is rejected if either the selected option - rejects the computation or none options were selected. *) - val select : commands -> statement + The computation is rejected if either the selected option rejects the + computation or none options were selected. *) + val case : (ctype -> 'a cls t) -> ('a cls * statement) list -> statement (** [case classifer commands] case analysis. - The case combinator first classfies the argument using the - [classfier] function, which shall return a polymoprhic - variant, and then selects a command that matches the - selected class. + The case combinator first classfies the argument using the [classfier] + function, which shall return a polymoprhic variant, and then selects a + command that matches the selected class. - Rejects the computation if there's no matching class. - *) - val case : (ctype -> 'a cls t) -> ('a cls * statement) list -> statement + Rejects the computation if there's no matching class. *) - (** [any ps] holds if any of [ps] holds. *) val any : predicates -> predicate + (** [any ps] holds if any of [ps] holds. *) - (** [all ps] holds if all of [ps] hold.*) val all : predicates -> predicate + (** [all ps] holds if all of [ps] hold.*) - (** [neither ps] holds if neither of [ps] hold. *) val neither : predicates -> predicate + (** [neither ps] holds if neither of [ps] hold. *) - + val is : bool -> predicate (** [is cnd] holds if [cnd] holds. - Example, {[[ - is (size > 64), memory; - otherwise, registers; - - ]]} - *) - val is : bool -> predicate + Example, + {[ + [ (is (size > 64), memory); (otherwise, registers) ] + ]} *) + val otherwise : predicate (** [otherwise] is a predicate that is always [true]. I.e., it is [is true]. - This predicate is supposed to be used with the [select] - combinator as the last, catch-all, predicate, e.g., + This predicate is supposed to be used with the [select] combinator as + the last, catch-all, predicate, e.g., {[ - select arg [ - is_fundamental, pass_fundamental; - is_floating, pass_floating; - otherwise, pass_memory; - ] + select arg + [ + (is_fundamental, pass_fundamental); + (is_floating, pass_floating); + (otherwise, pass_memory); + ] ]} *) - val otherwise : predicate - (** [always] is a predicate that is always [true], i.e., [is true]. *) val always : predicate + (** [always] is a predicate that is always [true], i.e., [is true]. *) - (** [never] is a predicate that never holds, i.e., [is false]. *) val never : predicate + (** [never] is a predicate that never holds, i.e., [is false]. *) - (** [choose options] tries options in order until the first on that - is not rejected. + val choose : statements -> statement + (** [choose options] tries options in order until the first on that is not + rejected. - This combinator is like [choice] but is supposed to be used - inside [select], e.g., + This combinator is like [choice] but is supposed to be used inside + [select], e.g., {[ - select arg [ - p1, choose [o1, o2]; - either [p2; p3], choose [o3,o4]; - ] + select arg + [ + (p1, choose [ (o1, o2) ]); + (either [ p2; p3 ], choose [ (o3, o4) ]); + ] ]} *) - val choose : statements -> statement + val combine : statements -> statement (** [combine xs] combines statements into a signle statement. - Evaluates statements in the order they are specified. If any - of the statements rejects then the whole statement will be - rejected. *) - val combine : statements -> statement + Evaluates statements in the order they are specified. If any of the + statements rejects then the whole statement will be rejected. *) include Monad.Syntax.S with type 'a t := 'a t include Monad.Syntax.Let.S with type 'a t := 'a t - end + include V1 end - - (** [reify t size args] compiles the argument passing specification. - - If the spec is not rejected the returned structure will contain - the reification of the argument passing semantics. - - *) val reify : Theory.Target.t -> #Bap_c_size.base -> semantics t -> args option + (** [reify t size args] compiles the argument passing specification. - + If the spec is not rejected the returned structure will contain the + reification of the argument passing semantics. *) include Monad.S with type 'a t := 'a t include Monad.Choice.S with type 'a t := 'a t - (** An ordered collection of locations. - Arena is an expendable collection of data locations (usually - registers) that are used to pass arguments. Passing an argument - via an arena location consumes it so it is no longer available in the - same computation. + Arena is an expendable collection of data locations (usually registers) + that are used to pass arguments. Passing an argument via an arena location + consumes it so it is no longer available in the same computation. If a computation that used an arena location is later rejected then the - register is available again (the same as with any other - side-effects of a rejected compuation). - - The order of locations in arena, as well as their numbering - according to that order, usually matters. Many targets have - registers with the alphabetic orders of registers matching their - arena orders (with notable exception of x86) that enables the - direct usage of the [Theory.Target.regs] function to create - arenas. *) + register is available again (the same as with any other side-effects of a + rejected compuation). + + The order of locations in arena, as well as their numbering according to + that order, usually matters. Many targets have registers with the + alphabetic orders of registers matching their arena orders (with notable + exception of x86) that enables the direct usage of the + [Theory.Target.regs] function to create arenas. *) module Arena : sig - + val create : _ Theory.Var.t list -> arena t (** [create regs] creates an arena from the ordered list of registers. - All registers must have the same size and the list could be - empty. The registers will be used in the order of their - appereance in the [regs] list. *) - val create : _ Theory.Var.t list -> arena t + All registers must have the same size and the list could be empty. The + registers will be used in the order of their appereance in the [regs] + list. *) + val of_exps : exp list -> arena t (** [of_exps xs] creates an arena from the ordered list of expressions. - The expressions must have the same type and the list could be - empty. The locations will be used in the order in which they - were specified. + The expressions must have the same type and the list could be empty. The + locations will be used in the order in which they were specified. @since 2.5.0 *) - val of_exps : exp list -> arena t - - (** [of_roles t roles] creates an arena from registers of the - specified roles. - The registers are ordered in the alphabetic order. The - returned arena might be empty. *) val of_roles : Theory.role list -> Theory.Target.t -> arena t + (** [of_roles t roles] creates an arena from registers of the specified + roles. + The registers are ordered in the alphabetic order. The returned arena + might be empty. *) - (** [iargs t] the integer argument arena. - - An alias to [of_roles [function_argument; integer]] - *) val iargs : Theory.Target.t -> arena t + (** [iargs t] the integer argument arena. + An alias to [of_roles [function_argument; integer]] *) - (** [irets t] the integer return values arena. - - An alias to [of_roles [function_return; integer]] - *) val irets : Theory.Target.t -> arena t + (** [irets t] the integer return values arena. - (** [fargs t] the floating-point argument arena. + An alias to [of_roles [function_return; integer]] *) - An alias to [of_roles [function_argument; floating]] - *) val fargs : Theory.Target.t -> arena t + (** [fargs t] the floating-point argument arena. + An alias to [of_roles [function_argument; floating]] *) + val frets : Theory.Target.t -> arena t (** [frets t] the floating-point return values arena. - An alias to [of_roles [function_return; floatin]] - *) - val frets : Theory.Target.t -> arena t + An alias to [of_roles [function_return; floatin]] *) end end - +val define : + Theory.Target.t -> + #Bap_c_size.base -> + (attr list -> proto -> Arg.semantics Arg.t) -> + unit (** [define target pass] the high-level ABI specification function. - The function creates an abi processor and registers it using the - name obtained from the [target]. The [pass] function is used to - define the [insert_args] method (with the [sub] argument - ignored). - - The function also registers an ABI pass that checks the project - target and if it matches with the passed [target] the function - creates and registers the C API processor. -*) -val define : Theory.Target.t -> #Bap_c_size.base -> - (attr list -> proto -> Arg.semantics Arg.t) -> unit + The function creates an abi processor and registers it using the name + obtained from the [target]. The [pass] function is used to define the + [insert_args] method (with the [sub] argument ignored). + + The function also registers an ABI pass that checks the project target and + if it matches with the passed [target] the function creates and registers + the C API processor. *) diff --git a/lib/bap_c/bap_c_attr.ml b/lib/bap_c/bap_c_attr.ml index aa00a919c..11c74ca1b 100644 --- a/lib/bap_c/bap_c_attr.ml +++ b/lib/bap_c/bap_c_attr.ml @@ -1,63 +1,58 @@ - -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Bap_c_type +include Self () -include Self() - -module Registry(T : T) = struct +module Registry (T : T) = struct let registry : T.t list ref = ref [] let register x = registry := x :: !registry end type 'a pass = attr -> 'a term -> 'a term -include Registry(struct type t = sub pass end) -let apply attr sub = - List.fold !registry ~init:sub ~f:(fun sub f -> f attr sub) +include Registry (struct + type t = sub pass +end) + +let apply attr sub = List.fold !registry ~init:sub ~f:(fun sub f -> f attr sub) module Gnu = struct let register_attr n f = - let pass {Attr.name; args} sub = - if String.equal n name then f args sub else sub in + let pass { Attr.name; args } sub = + if String.equal n name then f args sub else sub + in register pass - exception Attr_type of string * string - exception Attr_arity of string + exception Attr_type of string * string + exception Attr_arity of string - let int n = - try Int.of_string n with exn -> raise (Attr_type ("",n)) - - let set attr v arg sub = - Term.set_attr arg attr v |> - Term.update arg_t sub + let int n = try Int.of_string n with exn -> raise (Attr_type ("", n)) + let set attr v arg sub = Term.set_attr arg attr v |> Term.update arg_t sub let mark_arg attr v sub i = match Term.nth arg_t sub (int i - 1) with | None -> - warning "failed to apply attribute %s to sub: %s" - (Value.Tag.name attr) (Sub.name sub); - sub + warning "failed to apply attribute %s to sub: %s" (Value.Tag.name attr) + (Sub.name sub); + sub | Some arg -> set attr v arg sub - - let mark_args attr args sub = - List.fold args ~init:sub ~f:(mark_arg attr ()) - + let mark_args attr args sub = List.fold args ~init:sub ~f:(mark_arg attr ()) let alloc_size = mark_args Arg.alloc_size - let format args sub = match args with - | [l;i;_] -> mark_arg Arg.format l sub i + let format args sub = + match args with + | [ l; i; _ ] -> mark_arg Arg.format l sub i | _ -> raise (Attr_arity "3") let nonnull = mark_args Arg.nonnull - let wur args sub = match Term.last arg_t sub with + let wur args sub = + match Term.last arg_t sub with | None -> sub | Some arg -> set Arg.warn_unused () arg sub - let set attr args sub = - Term.set_attr sub attr () + let set attr args sub = Term.set_attr sub attr () let () = register_attr "alloc_size" alloc_size; diff --git a/lib/bap_c/bap_c_attr.mli b/lib/bap_c/bap_c_attr.mli index 9c392874f..1136b91a3 100644 --- a/lib/bap_c/bap_c_attr.mli +++ b/lib/bap_c/bap_c_attr.mli @@ -1,17 +1,17 @@ +open Bap.Std (** Attribute processing. - This module allows to attach a semantic action for C attributes. - Each action is a term transformation, that should do nothing, if - an attribute is not known to him. -*) -open Bap.Std + This module allows to attach a semantic action for C attributes. Each action + is a term transformation, that should do nothing, if an attribute is not + known to him. *) + open Bap_c_type -(** a type of action *) type 'a pass = attr -> 'a term -> 'a term +(** a type of action *) -(** register an action *) val register : sub pass -> unit +(** register an action *) -(** apply all registered actions *) val apply : sub pass +(** apply all registered actions *) diff --git a/lib/bap_c/bap_c_data.ml b/lib/bap_c/bap_c_data.ml index 5151ad7c1..6c9ffc599 100644 --- a/lib/bap_c/bap_c_data.ml +++ b/lib/bap_c/bap_c_data.ml @@ -1,23 +1,11 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Format -type model32 = [ - | `LP32 - | `ILP32 -] - -type model64 = [ - | `ILP64 - | `LLP64 - | `LP64 -] - -type model = [model32 | model64] -type value = - | Top - | Set of word list -[@@deriving bin_io, compare, sexp] +type model32 = [ `LP32 | `ILP32 ] +type model64 = [ `ILP64 | `LLP64 | `LP64 ] +type model = [ model32 | model64 ] +type value = Top | Set of word list [@@deriving bin_io, compare, sexp] type 'd obj = | Basic of Bap_c_type.basic @@ -26,48 +14,42 @@ type 'd obj = | Union of 'd list [@@deriving bin_io, compare, sexp] -type ('d,'s) datum = +type ('d, 's) datum = | Imm of 's * 'd - | Seq of ('d,'s) datum list - | Ptr of ('d,'s) datum + | Seq of ('d, 's) datum list + | Ptr of ('d, 's) datum [@@deriving bin_io, compare, sexp] -type layout = {layout : (layout obj,int) datum} +type layout = { layout : (layout obj, int) datum } [@@deriving bin_io, compare, sexp] -type t = (value,Size.t) datum -[@@deriving bin_io, compare, sexp] +type t = (value, Size.t) datum [@@deriving bin_io, compare, sexp] let pp_value ppf = function | Top -> fprintf ppf "Top" | Set xs -> fprintf ppf "%a" (Seq.pp Word.pp) (Seq.of_list xs) + let rec pp ppf = function - | Imm (sz,v) -> fprintf ppf "%a:%a" pp_value v Size.pp sz + | Imm (sz, v) -> fprintf ppf "%a:%a" pp_value v Size.pp sz | Seq ts -> fprintf ppf "%a" (Seq.pp pp) (Seq.of_list ts) - | Ptr t -> fprintf ppf "%a ptr" pp t + | Ptr t -> fprintf ppf "%a ptr" pp t +let rec pp_layout ppf : layout -> unit = + fun { layout = datum } -> pp_datum ppf datum -let rec pp_layout ppf : layout -> unit = fun {layout=datum} -> - pp_datum ppf datum and pp_datum ppf : (layout obj, int) datum -> unit = function - | Imm (sz,v) -> - fprintf ppf "@[<2>[%a : %d]@]" pp_obj v sz + | Imm (sz, v) -> fprintf ppf "@[<2>[%a : %d]@]" pp_obj v sz | Seq objs -> - fprintf ppf "@[@[{@ "; - pp_print_list ~pp_sep:(fun ppf () -> - fprintf ppf ",@ ") - pp_datum ppf objs; - fprintf ppf "@]@;}@]" - | Ptr t -> - fprintf ppf "*%a" pp_datum t + fprintf ppf "@[@[{@ "; + pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf ",@ ") pp_datum ppf objs; + fprintf ppf "@]@;}@]" + | Ptr t -> fprintf ppf "*%a" pp_datum t + and pp_obj ppf : layout obj -> unit = function | Basic t -> Bap_c_type.(pp ppf (basic t)) - | Field (name,layout) -> - fprintf ppf "@[<2><%s : %a>@]" name pp_layout layout - | Undef -> - fprintf ppf "" + | Field (name, layout) -> fprintf ppf "@[<2><%s : %a>@]" name pp_layout layout + | Undef -> fprintf ppf "" | Union xs -> - fprintf ppf "@["; - pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf "@;| ") - pp_layout ppf xs; - fprintf ppf "@]" + fprintf ppf "@["; + pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf "@;| ") pp_layout ppf xs; + fprintf ppf "@]" diff --git a/lib/bap_c/bap_c_data.mli b/lib/bap_c/bap_c_data.mli index 14b9e3886..926562d9a 100644 --- a/lib/bap_c/bap_c_data.mli +++ b/lib/bap_c/bap_c_data.mli @@ -1,92 +1,74 @@ +open Core (** C Data model. This module defines abstractions for C values. - A value is backed by a datum - a sequence of bits that represents - the value. This module also defines models for integer - representation. -*) -open Core_kernel[@@warning "-D"] -open Bap.Std + A value is backed by a datum - a sequence of bits that represents the value. + This module also defines models for integer representation. *) -(** models for 32 bit systems *) -type model32 = [ - | `LP32 - | `ILP32 -] +open Bap.Std -(** models for 64 bit systems *) -type model64 = [ - | `ILP64 - | `LLP64 - | `LP64 -] +type model32 = [ `LP32 | `ILP32 ] +(** models for 32 bit systems *) +type model64 = [ `ILP64 | `LLP64 | `LP64 ] +(** models for 64 bit systems *) -(** The following table summarize all models of integer - representation. +type model = [ model32 | model64 ] +(** The following table summarize all models of integer representation. {v - LP32 ILP32 ILP64 LLP64 LP64 - char 8 8 8 8 8 - short 16 16 16 16 16 - int 16 32 64 32 32 - long 32 32 64 32 64 - addr 32 32 64 64 64 - v} -*) -type model = [model32 | model64] + LP32 ILP32 ILP64 LLP64 LP64 + char 8 8 8 8 8 + short 16 16 16 16 16 + int 16 32 64 32 32 + long 32 32 64 32 64 + addr 32 32 64 64 64 + v} *) (** A value lattice.*) type value = - | Top (** any possible value *) - | Set of word list (** one of the specified, [Set []] is bot *) + | Top (** any possible value *) + | Set of word list (** one of the specified, [Set []] is bot *) [@@deriving bin_io, compare, sexp] - (** A C Object representation. - The type is parameterized with the object layout representation to - enable the recursive definition of the generalized layout type. + The type is parameterized with the object layout representation to enable + the recursive definition of the generalized layout type. @since 2.5.0 *) type 'd obj = - | Basic of Bap_c_type.basic (** A value of a basic type *) - | Field of (string * 'd) (** A struct or union field *) - | Undef (** Undefined data (padding or code) *) - | Union of 'd list (** Union of values *) + | Basic of Bap_c_type.basic (** A value of a basic type *) + | Field of (string * 'd) (** A struct or union field *) + | Undef (** Undefined data (padding or code) *) + | Union of 'd list (** Union of values *) [@@deriving bin_io, compare, sexp] (** abstraction of a С datum. - The datum is a sequence of bits that represents a C object. We - abstract datum as either an immediate value of the given size, - or a sequence of data, or a pointer to a datum. + The datum is a sequence of bits that represents a C object. We abstract + datum as either an immediate value of the given size, or a sequence of data, + or a pointer to a datum. - @since 2.5.0 -*) -type ('d,'s) datum = - | Imm of 's * 'd (** [Imm (size, value)] *) - | Seq of ('d,'s) datum list (** [Seq [t1; ... ;tN]] *) - | Ptr of ('d,'s) datum (** [Ptr datum] *) + @since 2.5.0 *) +type ('d, 's) datum = + | Imm of 's * 'd (** [Imm (size, value)] *) + | Seq of ('d, 's) datum list (** [Seq [t1; ... ;tN]] *) + | Ptr of ('d, 's) datum (** [Ptr datum] *) [@@deriving bin_io, compare, sexp] - -(** Describes C object's layout. *) -type layout = {layout : (layout obj,int) datum} +type layout = { layout : (layout obj, int) datum } [@@deriving bin_io, compare, sexp] +(** Describes C object's layout. *) - +type t = (value, Size.t) datum [@@deriving bin_io, compare, sexp] (** The datum that uses value lattice for object representation. *) -type t = (value,Size.t) datum -[@@deriving bin_io, compare, sexp] - +val pp : Format.formatter -> t -> unit (** [pp ppf datum] prints the datum in a human-readable form. @since 2.5.0 *) -val pp : Format.formatter -> t -> unit - +val pp_layout : Format.formatter -> layout -> unit (** [pp_layout ppf layout] outputs layout in a human-readable form. @since 2.5.0 *) -val pp_layout : Format.formatter -> layout -> unit diff --git a/lib/bap_c/bap_c_parser.ml b/lib/bap_c/bap_c_parser.ml index b4f273394..770ac0599 100644 --- a/lib/bap_c/bap_c_parser.ml +++ b/lib/bap_c/bap_c_parser.ml @@ -1,4 +1,4 @@ -open Core_kernel[@@warning "-D"] +open Core type decls = (string * Bap_c_type.t) list type parser = Bap_c_size.base -> string -> decls Or_error.t @@ -6,6 +6,7 @@ type parser = Bap_c_size.base -> string -> decls Or_error.t let parser = ref None let provide p = parser := Some p -let run size file = match !parser with +let run size file = + match !parser with | None -> Or_error.error_string "C parser is not available" | Some parse -> parse size file diff --git a/lib/bap_c/bap_c_parser.mli b/lib/bap_c/bap_c_parser.mli index f307ef193..290232fda 100644 --- a/lib/bap_c/bap_c_parser.mli +++ b/lib/bap_c/bap_c_parser.mli @@ -1,16 +1,15 @@ +open Core (** A parser interface. - The module doesn't provide any parsers by itself, but allows it to - be provided by a third party module. -*) -open Core_kernel[@@warning "-D"] + The module doesn't provide any parsers by itself, but allows it to be + provided by a third party module. *) type decls = (string * Bap_c_type.t) list type parser = Bap_c_size.base -> string -> decls Or_error.t -(** [run filename] parses file and returns a mapping from identifier - to its type.*) val run : parser +(** [run filename] parses file and returns a mapping from identifier to its + type.*) -(** called by a plugin that provides a parser. *) val provide : parser -> unit +(** called by a plugin that provides a parser. *) diff --git a/lib/bap_c/bap_c_size.ml b/lib/bap_c/bap_c_size.ml index 73d6d6f9f..66204e56d 100644 --- a/lib/bap_c/bap_c_size.ml +++ b/lib/bap_c/bap_c_size.ml @@ -1,127 +1,132 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Bap_c_data open Bap_c_type -type real_size = [`r32 | `r64 | `r128] +type real_size = [ `r32 | `r64 | `r128 ] type 'a unqualified = (no_qualifier, 'a) spec - type bits = Int.t -let next_multitude_of ~n x = (x + (n-1)) land (lnot (n-1)) - +let next_multitude_of ~n x = (x + (n - 1)) land lnot (n - 1) let padding alignment offset = let align = Size.in_bits alignment in - (align - offset mod align) mod align - + (align - (offset mod align)) mod align +[@@deprecated "since [2021-05] this method is ignored"] let max_enum_elt xs = - List.fold xs ~init:None ~f:(fun m (_,x) -> - match m,x with - | None,None -> Some 0L + List.fold xs ~init:None ~f:(fun m (_, x) -> + match (m, x) with + | None, None -> Some 0L | Some m, None -> Some Int64.(succ m) | Some m, Some x -> Some Int64.(max m x) - | None,x -> x) |> function + | None, x -> x) + |> function | None -> 0L | Some x -> x -class base (m : model) = object(self) - method integer (t : integer) : size = - match m,t with - | _,(`bool|#char) -> `r8 - | _,#short -> `r16 - | `LP32,#cint -> `r16 - | (`ILP32|`LLP64|`LP64),#cint -> `r32 - | `ILP64,#cint -> `r64 - | (`LP32|`ILP32|`LLP64),#long -> `r32 - | (`ILP64|`LP64), #long -> `r64 - | _,#long_long -> `r64 - | _,`enum fields -> self#enum fields - - method pointer : addr_size = match m with - | #model32 -> `r32 - | #model64 -> `r64 - - method enum _ = self#integer `uint (* approximation *) - - method real (v : real) : real_size = match v with - | `float -> `r32 - | `double -> `r64 - | `long_double -> `r128 - - method private double_size : real_size -> size = function - | `r32 -> `r64 - | `r64 -> `r128 - | `r128 -> `r256 - - method complex : complex -> size = function - | `cfloat -> self#double_size (self#real `float) - | `cdouble -> self#double_size (self#real `double) - | `clong_double -> self#double_size (self#real `long_double) - - method floating : floating -> size = function - | #real as t -> (self#real t :> size) - | #complex as t -> (self#complex t :> size) - - method basic : basic -> size = function - | #integer as t -> self#integer t - | #floating as t -> self#floating t - - method scalar : scalar -> size = function - | `Basic {Spec.t} -> self#basic t - | `Pointer _ -> (self#pointer :> size) - - method padding t offset : size option = - match Size.of_int @@ padding (self#alignment t) offset with - | Error _ -> None - | Ok s -> Some s - - method alignment (t : Bap_c_type.t) : size = - let byte = `r8 in - match t with - | `Void -> byte - | `Array {Spec.t={Array.element}} -> self#alignment element - | `Structure {Spec.t={Compound.fields}} - | `Union {Spec.t={Compound.fields}} -> - List.fold fields ~init:byte ~f:(fun align (_,t) -> - Size.max align (self#alignment t)) - | `Function _ -> (self#pointer :> size) - | #scalar as t -> self#scalar t - - method bits : t -> Int.t option = fun t -> - let size = match t with - | `Void -> None - | #scalar as t -> Some (Size.in_bits (self#scalar t)) - | `Function _ -> None - | `Union s -> self#union s - | `Array s -> self#array s - | `Structure s -> self#structure s in - Option.map size ~f:(fun size -> - let alignment = self#alignment t in - next_multitude_of ~n:(Size.in_bits alignment) size) - - method array : _ -> Int.t option = - fun {Spec.t={Array.element=t; size}} -> match size with - | None -> None - | Some n -> match self#bits t with +class base (m : model) = + object (self) + method integer (t : integer) : size = + match (m, t) with + | _, (`bool | #char) -> `r8 + | _, #short -> `r16 + | `LP32, #cint -> `r16 + | (`ILP32 | `LLP64 | `LP64), #cint -> `r32 + | `ILP64, #cint -> `r64 + | (`LP32 | `ILP32 | `LLP64), #long -> `r32 + | (`ILP64 | `LP64), #long -> `r64 + | _, #long_long -> `r64 + | _, `enum fields -> self#enum fields + + method pointer : addr_size = + match m with #model32 -> `r32 | #model64 -> `r64 + + method enum _ = self#integer `uint (* approximation *) + + method real (v : real) : real_size = + match v with `float -> `r32 | `double -> `r64 | `long_double -> `r128 + + method private double_size : real_size -> size = + function `r32 -> `r64 | `r64 -> `r128 | `r128 -> `r256 + + method complex : complex -> size = + function + | `cfloat -> self#double_size (self#real `float) + | `cdouble -> self#double_size (self#real `double) + | `clong_double -> self#double_size (self#real `long_double) + + method floating : floating -> size = + function + | #real as t -> (self#real t :> size) + | #complex as t -> (self#complex t :> size) + + method basic : basic -> size = + function + | #integer as t -> self#integer t + | #floating as t -> self#floating t + + method scalar : scalar -> size = + function + | `Basic { Spec.t } -> self#basic t + | `Pointer _ -> (self#pointer :> size) + + method padding t offset : size option = + match Size.of_int @@ padding (self#alignment t) offset with + | Error _ -> None + | Ok s -> Some s + + method alignment (t : Bap_c_type.t) : size = + let byte = `r8 in + match t with + | `Void -> byte + | `Array { Spec.t = { Array.element } } -> self#alignment element + | `Structure { Spec.t = { Compound.fields } } + | `Union { Spec.t = { Compound.fields } } -> + List.fold fields ~init:byte ~f:(fun align (_, t) -> + Size.max align (self#alignment t)) + | `Function _ -> (self#pointer :> size) + | #scalar as t -> self#scalar t + + method bits : t -> Int.t option = + fun t -> + let size = + match t with + | `Void -> None + | #scalar as t -> Some (Size.in_bits (self#scalar t)) + | `Function _ -> None + | `Union s -> self#union s + | `Array s -> self#array s + | `Structure s -> self#structure s + in + Option.map size ~f:(fun size -> + let alignment = self#alignment t in + next_multitude_of ~n:(Size.in_bits alignment) size) + + method array : _ -> Int.t option = + fun { Spec.t = { Array.element = t; size } } -> + match size with | None -> None - | Some x -> Some (n * x) - - method union : compound unqualified -> Int.t option = - fun {Spec.t={Compound.fields}} -> - List.map fields ~f:(fun (_,t) -> self#bits t) |> Option.all |> function - | None -> None - | Some ss -> List.max_elt ~compare:Int.compare ss |> function - | None -> None - | Some s -> Some s - - method structure : compound unqualified -> Int.t option = - fun {Spec.t={Compound.fields}} -> - List.fold fields ~init:(Some 0) ~f:(fun sz (_,field) -> match sz with + | Some n -> ( + match self#bits t with None -> None | Some x -> Some (n * x)) + + method union : compound unqualified -> Int.t option = + fun { Spec.t = { Compound.fields } } -> + List.map fields ~f:(fun (_, t) -> self#bits t) |> Option.all |> function | None -> None - | Some sz -> match self#bits field with - | None -> None - | Some sz' -> - Some (sz + sz' + padding (self#alignment field) sz)) -end + | Some ss -> ( + List.max_elt ~compare:Int.compare ss |> function + | None -> None + | Some s -> Some s) + + method structure : compound unqualified -> Int.t option = + fun { Spec.t = { Compound.fields } } -> + List.fold fields ~init:(Some 0) ~f:(fun sz (_, field) -> + match sz with + | None -> None + | Some sz -> ( + match self#bits field with + | None -> None + | Some sz' -> Some (sz + sz' + padding (self#alignment field) sz) + )) + end diff --git a/lib/bap_c/bap_c_size.mli b/lib/bap_c/bap_c_size.mli index 2d60bcd14..15cbb88d0 100644 --- a/lib/bap_c/bap_c_size.mli +++ b/lib/bap_c/bap_c_size.mli @@ -1,44 +1,38 @@ +open Core (** An abstraction of sizeof operator.*) -open Core_kernel[@@warning "-D"] + open Bap.Std open Bap_c_data open Bap_c_type - type bits = Int.t - -(** [next_multitude_of ~n x] returns [y >= x] so that [y] - is a multitude of [n], i.e., [y = n * k]. - - @since 2.5.0 *) val next_multitude_of : n:int -> int -> int - - -(** [padding alignment offset] computes the required padding at - [offset] to ensure the [alignment]. +(** [next_multitude_of ~n x] returns [y >= x] so that [y] is a multitude of [n], + i.e., [y = n * k]. @since 2.5.0 *) -val padding : Size.t -> int -> int - +val padding : Size.t -> int -> int +(** [padding alignment offset] computes the required padding at [offset] to + ensure the [alignment]. -(** [max_enum_elt enum] returns the maximum element in the enum - specification. + @since 2.5.0 *) - @since 2.5.0 -*) val max_enum_elt : (string * int64 option) list -> int64 +(** [max_enum_elt enum] returns the maximum element in the enum specification. + + @since 2.5.0 *) (** The base class for computing sizes and aligments of C data types. - The algorithm is implemented as a class to allow - a particular implementation to fine tune the calculation. + The algorithm is implemented as a class to allow a particular implementation + to fine tune the calculation. - The [model] argument defines the default sizes for integral data - types. If no suitable model is available for your architecture - then use the closest model and override the specific methods to - fine-tune the data model of your target. + The [model] argument defines the default sizes for integral data types. If + no suitable model is available for your architecture then use the closest + model and override the specific methods to fine-tune the data model of your + target. The entry methods are [bits] and [aligment]. @@ -54,10 +48,9 @@ val max_enum_elt : (string * int64 option) list -> int64 }; v} - Using the LP64 data model, in which integers are 32 bit long and - char is 8 bit. The size of the structure is 12 bytes, due to - the 3 bytes of padding before [v2] and six bytes of trailing - padding. + Using the LP64 data model, in which integers are 32 bit long and char is 8 + bit. The size of the structure is 12 bytes, due to the 3 bytes of padding + before [v2] and six bytes of trailing padding. {[ # let size = new C.Size.base `LP64;; @@ -67,82 +60,69 @@ val max_enum_elt : (string * int64 option) list -> int64 "v3", basic `char ]);; - : C.Size.bits option = Some 96 - ]} -*) -class base : model -> object - - - (** returns a size of the data type representation in bits. - - For incomplete types returns [None]. The size is always a - multitude of the data type alignment and includes the - paddings necessary for preserving the alignment restrictions. - - @since 2.5.0 the size is a multitude of the alignment. - *) - method bits : t -> bits option - - (** [alignment t] the alignment of data type [t]. - - The alignment of - - void or an incomplete type is 8; - - a scalar is [sizeof(t)]; - - an array is the alignment its element; - - a function pointer is [sizeof] the pointer; - - a structure or a union is the largest of the element's alignments. - - *) - method alignment : t -> size - - (** DEPRECATED. Use the [padding] function if you need to compute - padding. *) - method padding : t -> bits -> size option - [@@deprecated "since [2021-05] this method is ignored"] - (* this method was deprecated as + ]} *) +class base : model -> object + method bits : t -> bits option + (** returns a size of the data type representation in bits. + + For incomplete types returns [None]. The size is always a multitude of the + data type alignment and includes the paddings necessary for preserving the + alignment restrictions. + + @since 2.5.0 the size is a multitude of the alignment. *) + + method alignment : t -> size + (** [alignment t] the alignment of data type [t]. + + The alignment of + - void or an incomplete type is 8; + - a scalar is [sizeof(t)]; + - an array is the alignment its element; + - a function pointer is [sizeof] the pointer; + - a structure or a union is the largest of the element's alignments. *) + + method padding : t -> bits -> size option + (** DEPRECATED. Use the [padding] function if you need to compute padding. *) + (* this method was deprecated as 1) it has an incorrect type (padding can have any number of bits) 2) padding is fully defined by the alignemnt and there is no need to parameterize it. *) + method array : (cvr qualifier, array) spec -> bits option + (** [array spec] if array [spec] is complete, i.e., the number of elements is + known, then returns a product of the array size and the array's element + type in bits, otherwise returns [None] *) - (** [array spec] if array [spec] is complete, i.e., the number of - elements is known, then returns a product of the - array size and the array's element type in bits, - otherwise returns [None] - *) - method array : (cvr qualifier, array) spec -> bits option - - (** if spec is complete then returns a size in bits of the biggest - element, including the padding between fields, but excludeing - the trailing padding. *) - method union : (no_qualifier, compound) spec -> bits option - - (** if spec is complete then returns a total size of the - structure, including the padding between fields, but excluding - the trailing padding. *) - method structure : (no_qualifier, compound) spec -> bits option + method union : (no_qualifier, compound) spec -> bits option + (** if spec is complete then returns a size in bits of the biggest element, + including the padding between fields, but excludeing the trailing padding. + *) + method structure : (no_qualifier, compound) spec -> bits option + (** if spec is complete then returns a total size of the structure, including + the padding between fields, but excluding the trailing padding. *) - (** the size of intergral types. *) - method integer : integer -> size + method integer : integer -> size + (** the size of intergral types. *) - (** the size of a pointer. *) - method pointer : addr_size + method pointer : addr_size + (** the size of a pointer. *) - (** the size of the enumeration. *) - method enum : (string * int64 option) list -> size + method enum : (string * int64 option) list -> size + (** the size of the enumeration. *) - (** the size of a real floating-point data type. *) - method real : real -> [`r32 | `r64 | `r128] + method real : real -> [ `r32 | `r64 | `r128 ] + (** the size of a real floating-point data type. *) - (** the size of a complex floating-point data type. *) - method complex : complex -> size + method complex : complex -> size + (** the size of a complex floating-point data type. *) - (** the size of a floating-point data type. *) - method floating : floating -> size + method floating : floating -> size + (** the size of a floating-point data type. *) - (** the size of a basic data type. *) - method basic : basic -> size + method basic : basic -> size + (** the size of a basic data type. *) - (** the size of a scalar data type. *) - method scalar : scalar -> size - end + method scalar : scalar -> size + (** the size of a scalar data type. *) +end diff --git a/lib/bap_c/bap_c_term_attributes.ml b/lib/bap_c/bap_c_term_attributes.ml index f8ffb6e75..84005637a 100644 --- a/lib/bap_c/bap_c_term_attributes.ml +++ b/lib/bap_c/bap_c_term_attributes.ml @@ -1,39 +1,44 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Format - - module Data = Bap_c_data - module Type = struct include Bap_c_type + let pp = Bap_c_type_printer.pp end module Layout = struct open Bap_c_data + type t = layout [@@deriving bin_io, compare, sexp] + let pp = pp_layout end module Proto = struct type t = Bap_c_type.proto [@@deriving bin_io, compare, sexp] + let pp = Bap_c_type_printer.pp_proto end -let data = Value.Tag.register (module Data) - ~name:"c.data" - ~uuid:"e857a310-2cf0-487f-a879-ef5d8e38b3c9" - -let layout = Value.Tag.register (module Layout) - ~name:"c.layout" - ~uuid:"e26dbba0-c912-45fb-ac4c-b4a1c242a4f3" - -let t = Value.Tag.register (module Type) - ~name:"c.type" - ~uuid:"f668d2ac-874c-4369-acb3-138c041c98c7" - -let proto = Value.Tag.register (module Proto) - ~name:"c.proto" - ~uuid:"23efab19-4293-4bb7-9c34-0afc63986c2e" +let data = + Value.Tag.register + (module Data) + ~name:"c.data" ~uuid:"e857a310-2cf0-487f-a879-ef5d8e38b3c9" + +let layout = + Value.Tag.register + (module Layout) + ~name:"c.layout" ~uuid:"e26dbba0-c912-45fb-ac4c-b4a1c242a4f3" + +let t = + Value.Tag.register + (module Type) + ~name:"c.type" ~uuid:"f668d2ac-874c-4369-acb3-138c041c98c7" + +let proto = + Value.Tag.register + (module Proto) + ~name:"c.proto" ~uuid:"23efab19-4293-4bb7-9c34-0afc63986c2e" diff --git a/lib/bap_c/bap_c_term_attributes.mli b/lib/bap_c/bap_c_term_attributes.mli index d02078b47..cb4696093 100644 --- a/lib/bap_c/bap_c_term_attributes.mli +++ b/lib/bap_c/bap_c_term_attributes.mli @@ -1,24 +1,21 @@ -(** BIR attributes. *) open Bap.Std -open Bap_c_type +(** BIR attributes. *) +open Bap_c_type -(** Abstraction of a data representation of C value. This - attribute is attached to each inserted arg term, but can be - further propagated by other passes *) val data : Bap_c_data.t tag +(** Abstraction of a data representation of C value. This attribute is attached + to each inserted arg term, but can be further propagated by other passes *) -(** Function prototype. This attribute is inserted into each - annotated function. *) val proto : proto tag +(** Function prototype. This attribute is inserted into each annotated function. +*) - +val layout : Bap_c_data.layout tag (** [layout] describes the layout of a C object. - @since 2.5.0 *) -val layout : Bap_c_data.layout tag + @since 2.5.0 *) -(** A c type associated with a term. This attribute is attached to - each inserted arg term, but maybe propagated by further by other - passes. *) val t : t tag +(** A c type associated with a term. This attribute is attached to each inserted + arg term, but maybe propagated by further by other passes. *) diff --git a/lib/bap_c/bap_c_type.ml b/lib/bap_c/bap_c_type.ml index 64ae41362..b7fa78c92 100644 --- a/lib/bap_c/bap_c_type.ml +++ b/lib/bap_c/bap_c_type.ml @@ -1,303 +1,235 @@ +open Core (** C Type System. - We represent a C type structurally, i.e., the type representation - is self-containted and doesn't require any typing requirement. + We represent a C type structurally, i.e., the type representation is + self-containted and doesn't require any typing requirement. - Polymorphic variants are used to represent C type constructors and - type groups. + Polymorphic variants are used to represent C type constructors and type + groups. - The type system is extended with attributes, i.e., it is possible - to attach attributes of the form [attr(args)] to C type - declarations. -*) -open Core_kernel[@@warning "-D"] + The type system is extended with attributes, i.e., it is possible to attach + attributes of the form [attr(args)] to C type declarations. *) -type char = - [ `schar | `char | `uchar] -[@@deriving bin_io,compare,sexp,enumerate] +type char = [ `schar | `char | `uchar ] +[@@deriving bin_io, compare, sexp, enumerate] -type short = - [`sshort | `ushort] -[@@deriving bin_io,compare,sexp,enumerate] +type short = [ `sshort | `ushort ] [@@deriving bin_io, compare, sexp, enumerate] +type cint = [ `uint | `sint ] [@@deriving bin_io, compare, sexp, enumerate] +type long = [ `slong | `ulong ] [@@deriving bin_io, compare, sexp, enumerate] -type cint = - [`uint | `sint] -[@@deriving bin_io,compare,sexp,enumerate] +type long_long = [ `slong_long | `ulong_long ] +[@@deriving bin_io, compare, sexp, enumerate] -type long = - [`slong | `ulong] -[@@deriving bin_io,compare,sexp,enumerate] +type signed = [ `schar | `sshort | `sint | `slong | `slong_long ] +[@@deriving bin_io, compare, sexp, enumerate] -type long_long = - [`slong_long | `ulong_long] -[@@deriving bin_io,compare,sexp,enumerate] +type unsigned = [ `bool | `uchar | `ushort | `uint | `ulong | `ulong_long ] +[@@deriving bin_io, compare, sexp, enumerate] -type signed = - [`schar | `sshort | `sint | `slong | `slong_long] -[@@deriving bin_io,compare,sexp,enumerate] - -type unsigned = - [`bool | `uchar | `ushort | `uint | `ulong | `ulong_long] -[@@deriving bin_io,compare,sexp,enumerate] - -type enum = - [`enum of (string * int64 option) list] -[@@deriving bin_io,compare,sexp] - -type integer = - [char | signed | unsigned | enum] -[@@deriving bin_io,compare,sexp] - -type real = - [`float | `double | `long_double] -[@@deriving bin_io,compare,sexp,enumerate] - -type complex = - [`cfloat | `cdouble | `clong_double] -[@@deriving bin_io,compare,sexp,enumerate] +type enum = [ `enum of (string * int64 option) list ] +[@@deriving bin_io, compare, sexp] -type floating = [real | complex] -[@@deriving bin_io,compare,sexp,enumerate] +type integer = [ char | signed | unsigned | enum ] +[@@deriving bin_io, compare, sexp] -type basic = [integer | floating] -[@@deriving bin_io,compare,sexp] +type real = [ `float | `double | `long_double ] +[@@deriving bin_io, compare, sexp, enumerate] +type complex = [ `cfloat | `cdouble | `clong_double ] +[@@deriving bin_io, compare, sexp, enumerate] -type cv = unit [@@deriving bin_io,compare,sexp] -type cvr = Bool.t [@@deriving bin_io,compare,sexp] +type floating = [ real | complex ] [@@deriving bin_io, compare, sexp, enumerate] +type basic = [ integer | floating ] [@@deriving bin_io, compare, sexp] +type cv = unit [@@deriving bin_io, compare, sexp] +type cvr = Bool.t [@@deriving bin_io, compare, sexp] module Qualifier = struct - type 'a t = { - const : Bool.t; - volatile : Bool.t; - restrict : 'a; - } [@@deriving bin_io,compare,sexp] + type 'a t = { const : Bool.t; volatile : Bool.t; restrict : 'a } + [@@deriving bin_io, compare, sexp] end type 'a qualifier = 'a Qualifier.t [@@deriving bin_io, compare, sexp] module Attr = struct - type t = { - name : string; - args : string list [@sexp.list]; - } [@@deriving bin_io, compare, sexp] + type t = { name : string; args : string list [@sexp.list] } + [@@deriving bin_io, compare, sexp] end -type attr = Attr.t -[@@deriving bin_io, compare, sexp] +type attr = Attr.t [@@deriving bin_io, compare, sexp] module Spec = struct - type ('a,'b) t = { - qualifier : 'a; - t : 'b; - attrs : attr list [@sexp.list]; - } [@@deriving bin_io, compare, sexp] - + type ('a, 'b) t = { qualifier : 'a; t : 'b; attrs : attr list [@sexp.list] } + [@@deriving bin_io, compare, sexp] end -type ('a,'b) spec = ('a,'b) Spec.t -[@@deriving bin_io, compare, sexp] - -type no_qualifier = [`no_qualifier] -[@@deriving bin_io, compare, sexp] +type ('a, 'b) spec = ('a, 'b) Spec.t [@@deriving bin_io, compare, sexp] +type no_qualifier = [ `no_qualifier ] [@@deriving bin_io, compare, sexp] module Proto = struct - type 'a t = { - return : 'a; - args : (string * 'a) list; - variadic : Bool.t; - } [@@deriving bin_io, compare, sexp] + type 'a t = { return : 'a; args : (string * 'a) list; variadic : Bool.t } + [@@deriving bin_io, compare, sexp] end module Compound = struct - type 'a t = { - name : string; - fields : (string * 'a) list; - } [@@deriving bin_io, compare, sexp] + type 'a t = { name : string; fields : (string * 'a) list } + [@@deriving bin_io, compare, sexp] end module Array = struct - type 'a t = { - element : 'a; - size : Int.t option - } [@@deriving bin_io, compare, sexp] + type 'a t = { element : 'a; size : Int.t option } + [@@deriving bin_io, compare, sexp] end -type t = [ - | `Void - | `Basic of (cv qualifier, basic) spec - | `Pointer of (cvr qualifier, t) spec - | `Array of (cvr qualifier, array) spec - | `Structure of (no_qualifier, compound) spec - | `Union of (no_qualifier, compound) spec - | `Function of (no_qualifier, proto) spec -] [@@deriving bin_io, compare, sexp] -and proto = t Proto.t [@@deriving bin_io, compare, sexp] -and compound = t Compound.t [@@deriving bin_io, compare, sexp] -and array = t Array.t [@@deriving bin_io, compare, sexp] - - -type scalar = [ - | `Basic of (cv qualifier,basic) spec +type t = + [ `Void + | `Basic of (cv qualifier, basic) spec | `Pointer of (cvr qualifier, t) spec -] [@@deriving bin_io, compare, sexp] + | `Array of (cvr qualifier, array) spec + | `Structure of (no_qualifier, compound) spec + | `Union of (no_qualifier, compound) spec + | `Function of (no_qualifier, proto) spec ] +[@@deriving bin_io, compare, sexp] -type aggregate = [ - | `Array of (no_qualifier, t) spec - | `Structure of (no_qualifier, t list) spec -] [@@deriving bin_io, compare, sexp] +and proto = t Proto.t [@@deriving bin_io, compare, sexp] +and compound = t Compound.t [@@deriving bin_io, compare, sexp] +and array = t Array.t [@@deriving bin_io, compare, sexp] + +type scalar = + [ `Basic of (cv qualifier, basic) spec | `Pointer of (cvr qualifier, t) spec ] +[@@deriving bin_io, compare, sexp] + +type aggregate = + [ `Array of (no_qualifier, t) spec + | `Structure of (no_qualifier, t list) spec ] +[@@deriving bin_io, compare, sexp] let attrs : t -> attr list = function | `Void -> [] - | `Basic {attrs} - | `Pointer {attrs} - | `Array {attrs} - | `Structure {attrs} - | `Union {attrs} - | `Function {attrs} -> attrs + | `Basic { attrs } + | `Pointer { attrs } + | `Array { attrs } + | `Structure { attrs } + | `Union { attrs } + | `Function { attrs } -> + attrs let is_const : t -> Bool.t = function | `Void | `Union _ | `Structure _ | `Function _ -> false - | `Basic {qualifier={const}} - | `Array {qualifier={const}} - | `Pointer {qualifier={const}} -> const + | `Basic { qualifier = { const } } + | `Array { qualifier = { const } } + | `Pointer { qualifier = { const } } -> + const let is_volatile : t -> Bool.t = function | `Void | `Union _ | `Structure _ | `Function _ -> false - | `Basic {qualifier={volatile}} - | `Array {qualifier={volatile}} - | `Pointer {qualifier={volatile}} -> volatile + | `Basic { qualifier = { volatile } } + | `Array { qualifier = { volatile } } + | `Pointer { qualifier = { volatile } } -> + volatile let is_restrict : t -> Bool.t = function | `Void | `Union _ | `Structure _ | `Function _ | `Basic _ -> false - | `Array {qualifier={restrict}} - | `Pointer {qualifier={restrict}} -> restrict - + | `Array { qualifier = { restrict } } | `Pointer { qualifier = { restrict } } + -> + restrict let is_void = function `Void -> true | _ -> false -let qualifier ?(const=false) ?(volatile=false) restrict = - Qualifier.{const; volatile; restrict} +let qualifier ?(const = false) ?(volatile = false) restrict = + Qualifier.{ const; volatile; restrict } -let basic ?(attrs=[]) ?const ?volatile t : t = - `Basic { - t; - attrs; - qualifier = qualifier ?const ?volatile (); - } +let basic ?(attrs = []) ?const ?volatile t : t = + `Basic { t; attrs; qualifier = qualifier ?const ?volatile () } -let is_basic : t -> Bool.t = - function `Basic _ -> true | _ -> false +let is_basic : t -> Bool.t = function `Basic _ -> true | _ -> false +let is_char : t -> Bool.t = function `Basic { t = #char } -> true | _ -> false -let is_char : t -> Bool.t = - function `Basic {t=#char} -> true | _ -> false +let is_short : t -> Bool.t = function + | `Basic { t = #short } -> true + | _ -> false -let is_short : t -> Bool.t = - function `Basic {t=#short} -> true | _ -> false +let is_cint : t -> Bool.t = function `Basic { t = #cint } -> true | _ -> false -let is_cint : t -> Bool.t = - function `Basic {t=#cint} -> true | _ -> false +let is_signed : t -> Bool.t = function + | `Basic { t = #signed } -> true + | _ -> false -let is_signed : t -> Bool.t = - function `Basic {t=#signed} -> true | _ -> false +let is_unsigned : t -> Bool.t = function + | `Basic { t = #unsigned } -> true + | _ -> false -let is_unsigned : t -> Bool.t = - function `Basic {t=#unsigned} -> true | _ -> false +let is_enum : t -> Bool.t = function `Basic { t = #enum } -> true | _ -> false -let is_enum : t -> Bool.t = - function `Basic {t=#enum} -> true | _ -> false +let is_integer : t -> Bool.t = function + | `Basic { t = #integer } -> true + | _ -> false -let is_integer : t -> Bool.t = - function `Basic {t=#integer} -> true | _ -> false +let is_real : t -> Bool.t = function `Basic { t = #real } -> true | _ -> false -let is_real : t -> Bool.t = - function `Basic {t=#real} -> true | _ -> false +let is_complex : t -> Bool.t = function + | `Basic { t = #complex } -> true + | _ -> false -let is_complex : t -> Bool.t = - function `Basic {t=#complex} -> true | _ -> false +let is_floating : t -> Bool.t = function + | `Basic { t = #floating } -> true + | _ -> false -let is_floating : t -> Bool.t = - function `Basic {t=#floating} -> true | _ -> false - -let pointer ?(attrs=[]) ?const ?volatile ?(restrict=false) t : t = - `Pointer { - t; - attrs; - qualifier = qualifier ?const ?volatile restrict; - } +let pointer ?(attrs = []) ?const ?volatile ?(restrict = false) t : t = + `Pointer { t; attrs; qualifier = qualifier ?const ?volatile restrict } let is_pointer : t -> Bool.t = function `Pointer _ -> true | _ -> false -let array ?(attrs=[]) ?const ?volatile ?(restrict=false) ?size t : t = - `Array { - t = Array.{element=t; size}; - attrs; - qualifier = qualifier ?const ?volatile restrict; - } +let array ?(attrs = []) ?const ?volatile ?(restrict = false) ?size t : t = + `Array + { + t = Array.{ element = t; size }; + attrs; + qualifier = qualifier ?const ?volatile restrict; + } let is_array : t -> Bool.t = function `Array _ -> true | _ -> false -let structure ?(attrs=[]) name fields : t = - `Structure { - t = Compound.{name; fields}; - attrs; - qualifier = `no_qualifier; - } +let structure ?(attrs = []) name fields : t = + `Structure { t = Compound.{ name; fields }; attrs; qualifier = `no_qualifier } let is_structure : t -> Bool.t = function `Structure _ -> true | _ -> false - -let union ?(attrs=[]) name fields : t = - `Union { - t = Compound.{name; fields}; - attrs; - qualifier = `no_qualifier; - } +let union ?(attrs = []) name fields : t = + `Union { t = Compound.{ name; fields }; attrs; qualifier = `no_qualifier } let is_union : t -> Bool.t = function `Union _ -> true | _ -> false -let function_ ?(attrs=[]) ?(variadic=false) ?(return=`Void) args : t = - `Function { - t = Proto.{return; args; variadic}; - attrs; - qualifier = `no_qualifier; - } +let function_ ?(attrs = []) ?(variadic = false) ?(return = `Void) args : t = + `Function + { t = Proto.{ return; args; variadic }; attrs; qualifier = `no_qualifier } let is_function : t -> Bool.t = function `Function _ -> true | _ -> false - - let pp_comma ppf () = Format.fprintf ppf ", " let pp_attr ppf = function - | {Attr.name; args=[]} -> Format.fprintf ppf "%s" name - | {Attr.name; args} -> - Format.fprintf ppf "%s(%a)" name - (Format.pp_print_list ~pp_sep:pp_comma - Format.pp_print_string) args + | { Attr.name; args = [] } -> Format.fprintf ppf "%s" name + | { Attr.name; args } -> + Format.fprintf ppf "%s(%a)" name + (Format.pp_print_list ~pp_sep:pp_comma Format.pp_print_string) + args -let pp_attr_list ppf xs = - Format.pp_print_list ~pp_sep:pp_comma pp_attr ppf xs +let pp_attr_list ppf xs = Format.pp_print_list ~pp_sep:pp_comma pp_attr ppf xs let pp_attrs ppf = function | [] -> () - | attrs -> - Format.fprintf ppf " __attribute__((%a)) " - pp_attr_list attrs + | attrs -> Format.fprintf ppf " __attribute__((%a)) " pp_attr_list attrs let pp_qualifier name ppf = function | false -> () | true -> Format.fprintf ppf " %s " name -let pp_cv ppf {Qualifier.const;volatile} = - Format.fprintf ppf "%a%a" - (pp_qualifier "const") const +let pp_cv ppf { Qualifier.const; volatile } = + Format.fprintf ppf "%a%a" (pp_qualifier "const") const (pp_qualifier "volatile") volatile -let pp_cvr ppf {Qualifier.const;volatile;restrict} = - Format.fprintf ppf "%a%a%a" - (pp_qualifier "const") const - (pp_qualifier "volatile") volatile - (pp_qualifier "restrict") restrict +let pp_cvr ppf { Qualifier.const; volatile; restrict } = + Format.fprintf ppf "%a%a%a" (pp_qualifier "const") const + (pp_qualifier "volatile") volatile (pp_qualifier "restrict") restrict let pp_size ppf = function | None -> () @@ -307,18 +239,18 @@ let pp_enum_value ppf = function | None -> Format.fprintf ppf "," | Some v -> Format.fprintf ppf "= %Ld," v -let pp_enum_field ppf (name,value) = +let pp_enum_field ppf (name, value) = Format.fprintf ppf "@,%s%a," name pp_enum_value value -let pp_enum_fields ppf = - List.iter ~f:(pp_enum_field ppf) +let pp_enum_fields ppf = List.iter ~f:(pp_enum_field ppf) -let string_of_basic t = match (t : basic) with +let string_of_basic t = + match (t : basic) with | `schar -> "signed char" | `cdouble -> "double complex" - | `long_double -> "long double" + | `long_double -> "long double" | `cfloat -> "float complex" - | `float -> "float" + | `float -> "float" | `clong_double -> "long double complex" | `ulong_long -> "unsigned long long" | `uint -> "unsigned" @@ -333,51 +265,49 @@ let string_of_basic t = match (t : basic) with | `ulong -> "unsigned long" | `uchar -> "unsigned char" | `enum fields -> - Format.asprintf "@[@[enum {%a@]@,}@;" pp_enum_fields fields + Format.asprintf "@[@[enum {%a@]@,}@;" pp_enum_fields fields let pp_variadic ppf = function | true -> Format.fprintf ppf ", ..." | false -> () -let pp_basic ppf t = - Format.pp_print_string ppf (string_of_basic t) - -let rec pp ppf t = match (t : t) with - | `Void -> - Format.fprintf ppf "void" - | `Array { qualifier; t={ element; size }; attrs } -> - Format.fprintf ppf "@[%a%a%a[%a]@]" - pp_attrs attrs pp_cvr qualifier pp_incomplete element pp_size size - |`Basic { qualifier; t; attrs } -> - Format.fprintf ppf "@[%a%a%a@]" - pp_attrs attrs pp_cv qualifier pp_basic t - | `Function { t=proto; attrs } -> - Format.fprintf ppf "@[%a%a;@]" - pp_proto proto pp_attrs attrs +let pp_basic ppf t = Format.pp_print_string ppf (string_of_basic t) + +let rec pp ppf t = + match (t : t) with + | `Void -> Format.fprintf ppf "void" + | `Array { qualifier; t = { element; size }; attrs } -> + Format.fprintf ppf "@[%a%a%a[%a]@]" pp_attrs attrs pp_cvr qualifier + pp_incomplete element pp_size size + | `Basic { qualifier; t; attrs } -> + Format.fprintf ppf "@[%a%a%a@]" pp_attrs attrs pp_cv qualifier pp_basic + t + | `Function { t = proto; attrs } -> + Format.fprintf ppf "@[%a%a;@]" pp_proto proto pp_attrs attrs | `Pointer { qualifier; t; attrs } -> - Format.fprintf ppf "@[%a%a%a*@]" - pp_attrs attrs pp_incomplete t pp_cvr qualifier - | `Union { t={name;fields}; attrs } - | `Structure { t={name;fields}; attrs } as t -> - let kind = if is_structure t then "struct" else "union" in - Format.fprintf ppf "@[@[%s %s {@,%a@]@,}@]%a;" - kind name pp_fields fields pp_attrs attrs + Format.fprintf ppf "@[%a%a%a*@]" pp_attrs attrs pp_incomplete t pp_cvr + qualifier + | ( `Union { t = { name; fields }; attrs } + | `Structure { t = { name; fields }; attrs } ) as t -> + let kind = if is_structure t then "struct" else "union" in + Format.fprintf ppf "@[@[%s %s {@,%a@]@,}@]%a;" kind name pp_fields + fields pp_attrs attrs + and pp_proto ppf { return; args; variadic } = - Format.fprintf ppf "%a (*)(%a%a)" - pp_incomplete return pp_args args pp_variadic variadic + Format.fprintf ppf "%a (*)(%a%a)" pp_incomplete return pp_args args + pp_variadic variadic + and pp_args ppf = function | [] -> Format.fprintf ppf "void" - | args -> - Format.pp_print_list ~pp_sep:pp_comma pp_arg ppf args -and pp_arg ppf (name,t) = - Format.fprintf ppf "%a %s" pp_incomplete t name -and pp_fields ppf fields = - Format.pp_print_list pp_field ppf fields -and pp_field ppf (name,t) = - Format.fprintf ppf "%a %s;" pp_incomplete t name -and pp_incomplete ppf t = match (t : t) with - | `Union { t={name} } - | `Structure { t={name} } as t -> - let kind = if is_structure t then "struct" else "union" in - Format.fprintf ppf "%s %s" kind name + | args -> Format.pp_print_list ~pp_sep:pp_comma pp_arg ppf args + +and pp_arg ppf (name, t) = Format.fprintf ppf "%a %s" pp_incomplete t name +and pp_fields ppf fields = Format.pp_print_list pp_field ppf fields +and pp_field ppf (name, t) = Format.fprintf ppf "%a %s;" pp_incomplete t name + +and pp_incomplete ppf t = + match (t : t) with + | (`Union { t = { name } } | `Structure { t = { name } }) as t -> + let kind = if is_structure t then "struct" else "union" in + Format.fprintf ppf "%s %s" kind name | t -> pp ppf t diff --git a/lib/bap_c/bap_c_type.mli b/lib/bap_c/bap_c_type.mli index 84df9b564..77727e270 100644 --- a/lib/bap_c/bap_c_type.mli +++ b/lib/bap_c/bap_c_type.mli @@ -1,179 +1,131 @@ +open Core (** C Type System. - We represent a C type structurally, i.e., the type representation - is self-containted and doesn't require any typing requirement. + We represent a C type structurally, i.e., the type representation is + self-containted and doesn't require any typing requirement. - Polymorphic variants are used to represent C type constructors and - type groups. + Polymorphic variants are used to represent C type constructors and type + groups. - The type system is extended with attributes, i.e., it is possible - to attach attributes of the form [attr(args)] to C type - declarations. -*) -open Core_kernel[@@warning "-D"] + The type system is extended with attributes, i.e., it is possible to attach + attributes of the form [attr(args)] to C type declarations. *) -type char = - [ `schar | `char | `uchar] -[@@deriving bin_io,compare,sexp,enumerate] +type char = [ `schar | `char | `uchar ] +[@@deriving bin_io, compare, sexp, enumerate] -type short = - [`sshort | `ushort] -[@@deriving bin_io,compare,sexp,enumerate] +type short = [ `sshort | `ushort ] [@@deriving bin_io, compare, sexp, enumerate] +type cint = [ `uint | `sint ] [@@deriving bin_io, compare, sexp, enumerate] +type long = [ `slong | `ulong ] [@@deriving bin_io, compare, sexp, enumerate] -type cint = - [`uint | `sint] -[@@deriving bin_io,compare,sexp,enumerate] +type long_long = [ `slong_long | `ulong_long ] +[@@deriving bin_io, compare, sexp, enumerate] -type long = - [`slong | `ulong] -[@@deriving bin_io,compare,sexp,enumerate] +type signed = [ `schar | `sshort | `sint | `slong | `slong_long ] +[@@deriving bin_io, compare, sexp, enumerate] -type long_long = - [`slong_long | `ulong_long] -[@@deriving bin_io,compare,sexp,enumerate] +type unsigned = [ `bool | `uchar | `ushort | `uint | `ulong | `ulong_long ] +[@@deriving bin_io, compare, sexp, enumerate] -type signed = - [`schar | `sshort | `sint | `slong | `slong_long] -[@@deriving bin_io,compare,sexp,enumerate] - -type unsigned = - [`bool | `uchar | `ushort | `uint | `ulong | `ulong_long] -[@@deriving bin_io,compare,sexp,enumerate] - -type enum = - [`enum of (string * int64 option) list] -[@@deriving bin_io,compare,sexp] - -type integer = - [char | signed | unsigned | enum] -[@@deriving bin_io,compare,sexp] - -type real = - [`float | `double | `long_double] -[@@deriving bin_io,compare,sexp,enumerate] - -type complex = - [`cfloat | `cdouble | `clong_double] -[@@deriving bin_io,compare,sexp,enumerate] +type enum = [ `enum of (string * int64 option) list ] +[@@deriving bin_io, compare, sexp] -type floating = [real | complex] -[@@deriving bin_io,compare,sexp,enumerate] +type integer = [ char | signed | unsigned | enum ] +[@@deriving bin_io, compare, sexp] -type basic = [integer | floating] -[@@deriving bin_io,compare,sexp] +type real = [ `float | `double | `long_double ] +[@@deriving bin_io, compare, sexp, enumerate] +type complex = [ `cfloat | `cdouble | `clong_double ] +[@@deriving bin_io, compare, sexp, enumerate] -type cv = unit [@@deriving bin_io,compare,sexp] -type cvr = Bool.t [@@deriving bin_io,compare,sexp] +type floating = [ real | complex ] [@@deriving bin_io, compare, sexp, enumerate] +type basic = [ integer | floating ] [@@deriving bin_io, compare, sexp] +type cv = unit [@@deriving bin_io, compare, sexp] +type cvr = Bool.t [@@deriving bin_io, compare, sexp] module Qualifier : sig - type 'a t = { - const : Bool.t; - volatile : Bool.t; - restrict : 'a; - } [@@deriving bin_io,compare,sexp] + type 'a t = { const : Bool.t; volatile : Bool.t; restrict : 'a } + [@@deriving bin_io, compare, sexp] end type 'a qualifier = 'a Qualifier.t [@@deriving bin_io, compare, sexp] module Attr : sig - type t = { - name : string; - args : string list [@sexp.list]; - } [@@deriving bin_io, compare, sexp] + type t = { name : string; args : string list [@sexp.list] } + [@@deriving bin_io, compare, sexp] end -type attr = Attr.t -[@@deriving bin_io, compare, sexp] +type attr = Attr.t [@@deriving bin_io, compare, sexp] module Spec : sig - type ('a,'b) t = { - qualifier : 'a; - t : 'b; - attrs : attr list [@sexp.list]; - } [@@deriving bin_io, compare, sexp] - + type ('a, 'b) t = { qualifier : 'a; t : 'b; attrs : attr list [@sexp.list] } + [@@deriving bin_io, compare, sexp] end -type ('a,'b) spec = ('a,'b) Spec.t -[@@deriving bin_io, compare, sexp] - -type no_qualifier = [`no_qualifier] -[@@deriving bin_io, compare, sexp] +type ('a, 'b) spec = ('a, 'b) Spec.t [@@deriving bin_io, compare, sexp] +type no_qualifier = [ `no_qualifier ] [@@deriving bin_io, compare, sexp] module Proto : sig - type 'a t = { - return : 'a; - args : (string * 'a) list; - variadic : Bool.t; - } [@@deriving bin_io, compare, sexp] + type 'a t = { return : 'a; args : (string * 'a) list; variadic : Bool.t } + [@@deriving bin_io, compare, sexp] end module Compound : sig - type 'a t = { - name : string; - fields : (string * 'a) list; - } [@@deriving bin_io, compare, sexp] + type 'a t = { name : string; fields : (string * 'a) list } + [@@deriving bin_io, compare, sexp] end module Array : sig - type 'a t = { - element : 'a; - size : Int.t option - } [@@deriving bin_io, compare, sexp] + type 'a t = { element : 'a; size : Int.t option } + [@@deriving bin_io, compare, sexp] end -type t = [ - | `Void - | `Basic of (cv qualifier, basic) spec - | `Pointer of (cvr qualifier, t) spec - | `Array of (cvr qualifier, array) spec - | `Structure of (no_qualifier, compound) spec - | `Union of (no_qualifier, compound) spec - | `Function of (no_qualifier, proto) spec -] [@@deriving bin_io, compare, sexp] -and proto = t Proto.t [@@deriving bin_io, compare, sexp] -and compound = t Compound.t [@@deriving bin_io, compare, sexp] -and array = t Array.t [@@deriving bin_io, compare, sexp] - - -type scalar = [ - | `Basic of (cv qualifier,basic) spec +type t = + [ `Void + | `Basic of (cv qualifier, basic) spec | `Pointer of (cvr qualifier, t) spec -] [@@deriving bin_io, compare, sexp] + | `Array of (cvr qualifier, array) spec + | `Structure of (no_qualifier, compound) spec + | `Union of (no_qualifier, compound) spec + | `Function of (no_qualifier, proto) spec ] +[@@deriving bin_io, compare, sexp] -type aggregate = [ - | `Array of (no_qualifier, t) spec - | `Structure of (no_qualifier, t list) spec -] [@@deriving bin_io, compare, sexp] +and proto = t Proto.t [@@deriving bin_io, compare, sexp] +and compound = t Compound.t [@@deriving bin_io, compare, sexp] +and array = t Array.t [@@deriving bin_io, compare, sexp] +type scalar = + [ `Basic of (cv qualifier, basic) spec | `Pointer of (cvr qualifier, t) spec ] +[@@deriving bin_io, compare, sexp] + +type aggregate = + [ `Array of (no_qualifier, t) spec + | `Structure of (no_qualifier, t list) spec ] +[@@deriving bin_io, compare, sexp] -(** [attrs t] the list of attributes associated with the type [t]. *) val attrs : t -> attr list +(** [attrs t] the list of attributes associated with the type [t]. *) -(** [is_const t] is [true] if type is const-qualified.*) val is_const : t -> Bool.t +(** [is_const t] is [true] if type is const-qualified.*) -(** [is_volatile t] is [true] if type is volatile-qualified.*) val is_volatile : t -> Bool.t +(** [is_volatile t] is [true] if type is volatile-qualified.*) -(** [is_restrict t] is [true] if type is restrict-qualified.*) val is_restrict : t -> Bool.t +(** [is_restrict t] is [true] if type is restrict-qualified.*) - -(** [is_void t] true iff [t] is [`Void] *) val is_void : t -> Bool.t - +(** [is_void t] true iff [t] is [`Void] *) (** {2 Basic Types} *) +val basic : ?attrs:attr list -> ?const:Bool.t -> ?volatile:Bool.t -> basic -> t (** [basic x] constructs a basic type. - Example, [basic `char]. - All parameters default to false or empty. + Example, [basic `char]. All parameters default to false or empty. @since 2.5.0 *) -val basic : ?attrs:attr list -> ?const:Bool.t -> ?volatile:Bool.t -> basic -> t val is_basic : t -> Bool.t val is_char : t -> Bool.t @@ -187,54 +139,50 @@ val is_real : t -> Bool.t val is_complex : t -> Bool.t val is_floating : t -> Bool.t - (** {2 Pointers and Arrays} *) - -(** [pointer t] constructs a pointer to the type [t]. *) val pointer : ?attrs:attr list -> ?const:Bool.t -> ?volatile:Bool.t -> - ?restrict:Bool.t -> t -> t - -(** [array t] constructs an array of type [t] elements. + ?restrict:Bool.t -> + t -> + t +(** [pointer t] constructs a pointer to the type [t]. *) - The [size] is the optional size (the number of elements) of the array. -*) val array : ?attrs:attr list -> ?const:Bool.t -> ?volatile:Bool.t -> ?restrict:Bool.t -> ?size:Int.t -> - t -> t + t -> + t +(** [array t] constructs an array of type [t] elements. -val is_array : t -> Bool.t + The [size] is the optional size (the number of elements) of the array. *) +val is_array : t -> Bool.t val is_pointer : t -> Bool.t -(** {2 Compounds } *) +(** {2 Compounds} *) -(** [structure name fields] constructure a structure type. *) val structure : ?attrs:attr list -> string -> (string * t) list -> t +(** [structure name fields] constructure a structure type. *) val is_structure : t -> Bool.t -(** [union name fields] conunion a union type. *) val union : ?attrs:attr list -> string -> (string * t) list -> t +(** [union name fields] conunion a union type. *) val is_union : t -> Bool.t - +val function_ : + ?attrs:attr list -> ?variadic:Bool.t -> ?return:t -> (string * t) list -> t (** [function args] constructs a function type. - @param return defaults to [`Void]. -*) -val function_ : ?attrs:attr list -> ?variadic:Bool.t -> ?return:t -> - (string * t) list -> t + @param return defaults to [`Void]. *) val is_function : t -> Bool.t - val pp : Format.formatter -> t -> unit val pp_proto : Format.formatter -> proto -> unit diff --git a/lib/bap_c/bap_c_type_mapper.ml b/lib/bap_c/bap_c_type_mapper.ml index b385c4650..95d301ddb 100644 --- a/lib/bap_c/bap_c_type_mapper.ml +++ b/lib/bap_c/bap_c_type_mapper.ml @@ -1,322 +1,282 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Monads.Std open Bap_c_type -module Make(M : Monad.S2) = struct +module Make (M : Monad.S2) = struct open M.Monad_infix - type ('a,'e) m = ('a,'e) M.t - - class ['e] base = object(self) - method private visit : - 'a. ('a -> (unit,'e) m) -> ('a -> ('a,'e) m) -> ('a -> (unit,'e) m) -> - 'a -> ('a,'e) m = - fun enter visit leave t -> - enter t >>= fun () -> - visit t >>= fun t -> - leave t >>= fun () -> - M.return t - - method run = self#map_type - - method enter_type _ = M.return () - method leave_type _ = M.return () - method map_type = - self#visit self#enter_type self#do_map_type self#leave_type - - method enter_void _ = M.return () - method leave_void _ = M.return () - method map_void = - self#visit self#enter_void self#do_map_void self#leave_void - - method enter_array _ = M.return () - method leave_array _ = M.return () - method map_array = - self#visit self#enter_array self#do_map_array self#leave_array - - method enter_basic _ = M.return () - method leave_basic _ = M.return () - method map_basic = - self#visit self#enter_basic self#do_map_basic self#leave_basic - - method enter_pointer _ = M.return () - method leave_pointer _ = M.return () - method map_pointer = - self#visit - self#enter_pointer - self#do_map_pointer - self#leave_pointer - - method enter_function _ = M.return () - method leave_function _ = M.return () - method map_function = - self#visit - self#enter_function - self#do_map_function - self#leave_function - - method enter_union _ = M.return () - method leave_union _ = M.return () - method map_union = - self#visit - self#enter_union - self#do_map_union - self#leave_union - - method enter_structure _ = M.return () - method leave_structure _ = M.return () - method map_structure = - self#visit - self#enter_structure - self#do_map_structure - self#leave_structure - - method enter_cv_qualifier _ = M.return () - method leave_cv_qualifier _ = M.return () - method map_cv_qualifier = - self#visit - self#enter_cv_qualifier - self#do_map_cv_qualifier - self#leave_cv_qualifier - - method enter_cvr_qualifier _ = M.return () - method leave_cvr_qualifier _ = M.return () - method map_cvr_qualifier = - self#visit - self#enter_cvr_qualifier - self#do_map_cvr_qualifier - self#leave_cvr_qualifier - - method enter_integer _ = M.return () - method leave_integer _ = M.return () - method map_integer = - self#visit - self#enter_integer - self#do_map_integer - self#leave_integer - - method enter_floating _ = M.return () - method leave_floating _ = M.return () - method map_floating = - self#visit - self#enter_floating - self#do_map_floating - self#leave_floating - - method enter_char _ = M.return () - method leave_char _ = M.return () - method map_char = - self#visit - self#enter_char - self#do_map_char - self#leave_char - - method enter_signed _ = M.return () - method leave_signed _ = M.return () - method map_signed = - self#visit - self#enter_signed - self#do_map_signed - self#leave_signed - - method enter_unsigned _ = M.return () - method leave_unsigned _ = M.return () - method map_unsigned = - self#visit - self#enter_unsigned - self#do_map_unsigned - self#leave_unsigned - - method enter_enum _ = M.return () - method leave_enum _ = M.return () - method map_enum = - self#visit - self#enter_enum - self#do_map_enum - self#leave_enum - - method enter_real _ = M.return () - method leave_real _ = M.return () - method map_real = - self#visit - self#enter_real - self#do_map_real - self#leave_real - - method enter_complex _ = M.return () - method leave_complex _ = M.return () - method map_complex = - self#visit - self#enter_complex - self#do_map_complex - self#leave_complex - - method enter_name _ = M.return () - method leave_name _ = M.return () - method map_name = - self#visit - self#enter_name - self#do_map_name - self#leave_name - - method enter_attrs _ = M.return () - method leave_attrs _ = M.return () - method map_attrs = - self#visit - self#enter_attrs - self#do_map_attrs - self#leave_attrs - - method enter_attr _ = M.return () - method leave_attr _ = M.return () - method map_attr = - self#visit - self#enter_attr - self#do_map_attr - self#leave_attr - - method private do_map_type : t -> (t,'e) m = function - | `Void -> - self#map_void `Void - | `Array spec -> - self#map_spec - self#map_cvr_qualifier - self#map_array spec >>| fun spec -> - `Array spec - | `Basic spec -> - self#map_spec - self#map_cv_qualifier - self#map_basic spec >>| fun spec -> - `Basic spec - | `Pointer spec -> - self#map_spec - self#map_cvr_qualifier - self#map_pointer spec >>| fun spec -> - `Pointer spec - | `Function spec -> - self#map_spec - self#map_no_qualifier - self#map_function spec >>| fun spec -> - `Function spec - | `Union spec -> - self#map_spec - self#map_no_qualifier - self#map_union spec >>| fun spec -> - `Union spec - | `Structure spec -> - self#map_spec - self#map_no_qualifier - self#map_structure spec >>| fun spec -> - `Structure spec - - method private do_map_void = M.return - - method private map_spec - : 'a 'b. ('a -> ('a,'e) m) -> ('b -> ('b,'e) m) -> - ('a,'b) spec -> (('a,'b) spec, 'e) m = - fun map_qualifier map_t s -> - let open Spec in - map_t s.t >>= fun t -> - map_qualifier s.qualifier >>= fun qualifier -> - self#map_attrs s.attrs >>= fun attrs -> - M.return {t; qualifier; attrs} - - method private do_map_basic = function - | #integer as t -> - self#map_integer t >>| fun x -> (x :> basic) - | #floating as t -> - self#map_floating t >>| fun x -> (x :> basic) - - method private do_map_pointer = self#map_type - method private do_map_union = self#map_compound - method private do_map_structure = self#map_structure - - method private do_map_integer : integer -> (integer,'e) m = function - | #char as t -> self#map_char t >>| fun x -> (x :> integer) - | #signed as t -> self#map_signed t >>| fun x -> (x :> integer) - | #unsigned as t -> self#map_unsigned t >>| fun x -> (x :> integer) - | `enum fields -> - self#map_enum fields >>| fun fields -> - `enum fields - - method private do_map_floating : floating -> (floating,'e) m = function - | #real as t -> self#map_real t >>| fun t -> (t :> floating) - | #complex as t -> self#map_complex t >>| fun t -> (t :> floating) - - method private do_map_char : char -> (char,'e) m = M.return - method private do_map_signed : signed -> (signed,'e) m = M.return - method private do_map_unsigned : unsigned -> (unsigned,'e) m = M.return - method private do_map_enum = M.return - method private do_map_real : real -> (real,'e) m = M.return - method private do_map_complex : complex -> (complex,'e) m = M.return - - method private do_map_cv_qualifier = M.return - method private do_map_cvr_qualifier = M.return - method private map_no_qualifier = M.return - - method private do_map_array {Array.element;size} = - self#map_type element >>| fun element -> - {Array.element; size} - - method private map_compound {Compound.name; fields} = - self#map_fields fields >>= fun fields -> - self#map_name name >>= fun name -> - M.return Compound.{name;fields} - - method private do_map_function {Proto.args; return; variadic} = - self#map_type return >>= fun return -> - self#map_fields args >>= fun args -> - M.return { Proto.args; return; variadic} - - method private map_fields fields = - M.all @@ List.map fields ~f:(fun (n,t) -> - self#map_name n >>= fun n -> - self#map_type t >>= fun t -> - M.return (n,t)) - - method private do_map_name = M.return - method private do_map_attrs attrs = - M.all @@ List.map ~f:self#map_attr attrs - method private do_map_attr = M.return - end + + type ('a, 'e) m = ('a, 'e) M.t + + class ['e] base = + object (self) + method private visit : + 'a. + ('a -> (unit, 'e) m) -> + ('a -> ('a, 'e) m) -> + ('a -> (unit, 'e) m) -> + 'a -> + ('a, 'e) m = + fun enter visit leave t -> + enter t >>= fun () -> + visit t >>= fun t -> + leave t >>= fun () -> M.return t + + method run = self#map_type + method enter_type _ = M.return () + method leave_type _ = M.return () + + method map_type = + self#visit self#enter_type self#do_map_type self#leave_type + + method enter_void _ = M.return () + method leave_void _ = M.return () + + method map_void = + self#visit self#enter_void self#do_map_void self#leave_void + + method enter_array _ = M.return () + method leave_array _ = M.return () + + method map_array = + self#visit self#enter_array self#do_map_array self#leave_array + + method enter_basic _ = M.return () + method leave_basic _ = M.return () + + method map_basic = + self#visit self#enter_basic self#do_map_basic self#leave_basic + + method enter_pointer _ = M.return () + method leave_pointer _ = M.return () + + method map_pointer = + self#visit self#enter_pointer self#do_map_pointer self#leave_pointer + + method enter_function _ = M.return () + method leave_function _ = M.return () + + method map_function = + self#visit self#enter_function self#do_map_function self#leave_function + + method enter_union _ = M.return () + method leave_union _ = M.return () + + method map_union = + self#visit self#enter_union self#do_map_union self#leave_union + + method enter_structure _ = M.return () + method leave_structure _ = M.return () + + method map_structure = + self#visit self#enter_structure self#do_map_structure + self#leave_structure + + method enter_cv_qualifier _ = M.return () + method leave_cv_qualifier _ = M.return () + + method map_cv_qualifier = + self#visit self#enter_cv_qualifier self#do_map_cv_qualifier + self#leave_cv_qualifier + + method enter_cvr_qualifier _ = M.return () + method leave_cvr_qualifier _ = M.return () + + method map_cvr_qualifier = + self#visit self#enter_cvr_qualifier self#do_map_cvr_qualifier + self#leave_cvr_qualifier + + method enter_integer _ = M.return () + method leave_integer _ = M.return () + + method map_integer = + self#visit self#enter_integer self#do_map_integer self#leave_integer + + method enter_floating _ = M.return () + method leave_floating _ = M.return () + + method map_floating = + self#visit self#enter_floating self#do_map_floating self#leave_floating + + method enter_char _ = M.return () + method leave_char _ = M.return () + + method map_char = + self#visit self#enter_char self#do_map_char self#leave_char + + method enter_signed _ = M.return () + method leave_signed _ = M.return () + + method map_signed = + self#visit self#enter_signed self#do_map_signed self#leave_signed + + method enter_unsigned _ = M.return () + method leave_unsigned _ = M.return () + + method map_unsigned = + self#visit self#enter_unsigned self#do_map_unsigned self#leave_unsigned + + method enter_enum _ = M.return () + method leave_enum _ = M.return () + + method map_enum = + self#visit self#enter_enum self#do_map_enum self#leave_enum + + method enter_real _ = M.return () + method leave_real _ = M.return () + + method map_real = + self#visit self#enter_real self#do_map_real self#leave_real + + method enter_complex _ = M.return () + method leave_complex _ = M.return () + + method map_complex = + self#visit self#enter_complex self#do_map_complex self#leave_complex + + method enter_name _ = M.return () + method leave_name _ = M.return () + + method map_name = + self#visit self#enter_name self#do_map_name self#leave_name + + method enter_attrs _ = M.return () + method leave_attrs _ = M.return () + + method map_attrs = + self#visit self#enter_attrs self#do_map_attrs self#leave_attrs + + method enter_attr _ = M.return () + method leave_attr _ = M.return () + + method map_attr = + self#visit self#enter_attr self#do_map_attr self#leave_attr + + method private do_map_type : t -> (t, 'e) m = + function + | `Void -> self#map_void `Void + | `Array spec -> + self#map_spec self#map_cvr_qualifier self#map_array spec + >>| fun spec -> `Array spec + | `Basic spec -> + self#map_spec self#map_cv_qualifier self#map_basic spec + >>| fun spec -> `Basic spec + | `Pointer spec -> + self#map_spec self#map_cvr_qualifier self#map_pointer spec + >>| fun spec -> `Pointer spec + | `Function spec -> + self#map_spec self#map_no_qualifier self#map_function spec + >>| fun spec -> `Function spec + | `Union spec -> + self#map_spec self#map_no_qualifier self#map_union spec + >>| fun spec -> `Union spec + | `Structure spec -> + self#map_spec self#map_no_qualifier self#map_structure spec + >>| fun spec -> `Structure spec + + method private do_map_void = M.return + + method private map_spec : + 'a 'b. + ('a -> ('a, 'e) m) -> + ('b -> ('b, 'e) m) -> + ('a, 'b) spec -> + (('a, 'b) spec, 'e) m = + fun map_qualifier map_t s -> + let open Spec in + map_t s.t >>= fun t -> + map_qualifier s.qualifier >>= fun qualifier -> + self#map_attrs s.attrs >>= fun attrs -> + M.return { t; qualifier; attrs } + + method private do_map_basic = + function + | #integer as t -> self#map_integer t >>| fun x -> (x :> basic) + | #floating as t -> self#map_floating t >>| fun x -> (x :> basic) + + method private do_map_pointer = self#map_type + method private do_map_union = self#map_compound + method private do_map_structure = self#map_structure + + method private do_map_integer : integer -> (integer, 'e) m = + function + | #char as t -> self#map_char t >>| fun x -> (x :> integer) + | #signed as t -> self#map_signed t >>| fun x -> (x :> integer) + | #unsigned as t -> self#map_unsigned t >>| fun x -> (x :> integer) + | `enum fields -> self#map_enum fields >>| fun fields -> `enum fields + + method private do_map_floating : floating -> (floating, 'e) m = + function + | #real as t -> self#map_real t >>| fun t -> (t :> floating) + | #complex as t -> self#map_complex t >>| fun t -> (t :> floating) + + method private do_map_char : char -> (char, 'e) m = M.return + method private do_map_signed : signed -> (signed, 'e) m = M.return + method private do_map_unsigned : unsigned -> (unsigned, 'e) m = M.return + method private do_map_enum = M.return + method private do_map_real : real -> (real, 'e) m = M.return + method private do_map_complex : complex -> (complex, 'e) m = M.return + method private do_map_cv_qualifier = M.return + method private do_map_cvr_qualifier = M.return + method private map_no_qualifier = M.return + + method private do_map_array { Array.element; size } = + self#map_type element >>| fun element -> { Array.element; size } + + method private map_compound { Compound.name; fields } = + self#map_fields fields >>= fun fields -> + self#map_name name >>= fun name -> M.return Compound.{ name; fields } + + method private do_map_function { Proto.args; return; variadic } = + self#map_type return >>= fun return -> + self#map_fields args >>= fun args -> + M.return { Proto.args; return; variadic } + + method private map_fields fields = + M.all + @@ List.map fields ~f:(fun (n, t) -> + self#map_name n >>= fun n -> + self#map_type t >>= fun t -> M.return (n, t)) + + method private do_map_name = M.return + + method private do_map_attrs attrs = + M.all @@ List.map ~f:self#map_attr attrs + + method private do_map_attr = M.return + end end +module Ident2 : Monad.S2 with type ('a, 'e) t = 'a = struct + type ('a, 'e) t = 'a + + include Monad.Make2 (struct + type nonrec ('a, 'e) t = ('a, 'e) t -module Ident2 : Monad.S2 with type ('a,'e) t = 'a = struct - type ('a,'e) t = 'a - include Monad.Make2(struct - type nonrec ('a,'e) t = ('a,'e) t - let return = Fn.id - let bind x f = f x - let map = `Define_using_bind - end) + let return = Fn.id + let bind x f = f x + let map = `Define_using_bind + end) end module Search = struct module SM = Monad.State open SM.Monad_infix - type ('a,'e) t = ('a option, 'e option) SM.t - let finished x : ('a,'e) t = - SM.put (Some x) >>= fun () -> - SM.return None + type ('a, 'e) t = ('a option, 'e option) SM.t + let finished x : ('a, 'e) t = SM.put (Some x) >>= fun () -> SM.return None let result x : 'e option = SM.exec x None - include Monad.Make2(struct - type nonrec ('a,'e) t = ('a,'e) t - let return x = SM.return (Some x) - let bind x f = - x >>= function - | None -> SM.return None - | Some x -> f x - let map = `Define_using_bind - end) + include Monad.Make2 (struct + type nonrec ('a, 'e) t = ('a, 'e) t + let return x = SM.return (Some x) + let bind x f = x >>= function None -> SM.return None | Some x -> f x + let map = `Define_using_bind + end) end - -module State = Make(Monad.State) -module Finder = Make(Search) - -include Make(Ident2) +module State = Make (Monad.State) +module Finder = Make (Search) +include Make (Ident2) diff --git a/lib/bap_c/bap_c_type_mapper.mli b/lib/bap_c/bap_c_type_mapper.mli index f5f138cb8..8048f2704 100644 --- a/lib/bap_c/bap_c_type_mapper.mli +++ b/lib/bap_c/bap_c_type_mapper.mli @@ -1,34 +1,30 @@ -open Core_kernel[@@warning "-D"] +open Core open Bap.Std open Monads.Std open Bap_c_type_mapper_intf - +include S with type ('a, 'e) m = 'a (** include visitor/mapper with the monad stripped away. *) -include S with type ('a,'e) m = 'a (** Search Monad. - A monad for searching with abnormal exit, i.e., the computation - terminates as soon as an item is found. *) + A monad for searching with abnormal exit, i.e., the computation terminates + as soon as an item is found. *) module Search : sig include Monad.S2 + val finished : 'e -> ('a, 'e) t + (** [finished needle] is called when a search is finished, it will terminate + the search with the [needle] as a result. *) - (** [finished needle] is called when a search is finished, it will - terminate the search with the [needle] as a result. *) - val finished : 'e -> ('a,'e) t - - - (** [result s] runs the computation [s] and extracts the result. *) - val result : ('a,'e) t -> 'e option + val result : ('a, 'e) t -> 'e option + (** [result s] runs the computation [s] and extracts the result. *) end +module State : S with type ('a, 'e) m = ('a, 'e) Monad.State.t +(** the mapper lifted into a regular state monad. *) -(** the mapper lifted into a regular state monad. *) -module State : S with type ('a,'e) m = ('a,'e) Monad.State.t - - +module Finder : S with type ('a, 'e) m = ('a, 'e) Search.t (** the visitor lifted into the search monad. For example, the following code will find the first pointer: @@ -36,13 +32,15 @@ module State : S with type ('a,'e) m = ('a,'e) Monad.State.t {[ module Search = C.Type.Mapper.Search - let find_pointer t = Search.result @@ (object - inherit [C.Type.t] C.Type.Mapper.Finder.base - method! enter_pointer = Search.finished - end)#run t + let find_pointer t = + Search.result + @@ (object + inherit [C.Type.t] C.Type.Mapper.Finder.base + method! enter_pointer = Search.finished + end) + #run + t ]} *) -module Finder : S with type ('a,'e) m = ('a,'e) Search.t - -(** [Make(M)] lifts the visitor into monad [M]. *) -module Make( M : Monad.S2) : S with type ('a,'e) m = ('a,'e) M.t +(** [Make(M)] lifts the visitor into monad [M]. *) +module Make (M : Monad.S2) : S with type ('a, 'e) m = ('a, 'e) M.t diff --git a/lib/bap_c/bap_c_type_mapper_intf.ml b/lib/bap_c/bap_c_type_mapper_intf.ml index 92d0b4aee..6d1d7fd77 100644 --- a/lib/bap_c/bap_c_type_mapper_intf.ml +++ b/lib/bap_c/bap_c_type_mapper_intf.ml @@ -2,146 +2,121 @@ open Bap_c_type (** Type mapper, visitor, iterator, finder all in one. - The interface is wrapped into a monad, that allows, by choosing a - proper monad implement all the above morphisms and iterators. If - you're afraid of the monads, then calm down and continue reading, - there is a way to use this interface without any monads. + The interface is wrapped into a monad, that allows, by choosing a proper + monad implement all the above morphisms and iterators. If you're afraid of + the monads, then calm down and continue reading, there is a way to use this + interface without any monads. - Each syntactical element [t:T] of the type system is represented - with three methods: + Each syntactical element [t:T] of the type system is represented with three + methods: - [enter_T t] - [map_T t] - [leave_T t] - The [map_T t] method first calls [enter_T t], the applies a deep - mapping of the [t] to [t'] and finally calls [leave_T t']. + The [map_T t] method first calls [enter_T t], the applies a deep mapping of + the [t] to [t'] and finally calls [leave_T t']. - - Override [enter_T] if an element shouldn't be morphed. The - combination of [enter_t], [leave_T] allows to perform different - visiting strategies. If mapping is needed then a [map_T] method - should be overridden. A usual pattern would be: + Override [enter_T] if an element shouldn't be morphed. The combination of + [enter_t], [leave_T] allows to perform different visiting strategies. If + mapping is needed then a [map_T] method should be overridden. A usual + pattern would be: {[ - class my_mapper = object(self) - inherit base as super - - method map_T t = - super#map_T t >>| self#my_transformation - - method private my_transformation t = t - end + class my_mapper = + object (self) + inherit base as super + method map_T t = super#map_T t >>| self#my_transformation + method private my_transformation t = t + end ]} - - All method calls are bound with monadic operations. This makes it - possible to parametrize visitor with different computation strategies - (depending on a binding strategy of a monad). - - If the monad is a null monad, where bind is a reverse application, - and returns is an identity, then we have a normal execution and - the whole visitor degrade to a regular visitor/mapper without any - monads. Such visitor is instantiated as {!C.Type.Mapper.base}. - - The other two useful monads, are [State] and [Search]. The visitor - in these monads is instantiated as {!C.Type.Mapper.State.base} and - {!C.Type.Mapper.Finder.base} correspondingly. The former is useful - to implement a regular visitor, that will fold an abitrary value - over the type structure, or to implement a mapper, that can also - have a state. The latter, is useful for implementing a search with - a shortcut, i.e., when the searching is terminated as soon as the - target is found. -*) + All method calls are bound with monadic operations. This makes it possible + to parametrize visitor with different computation strategies (depending on a + binding strategy of a monad). + + If the monad is a null monad, where bind is a reverse application, and + returns is an identity, then we have a normal execution and the whole + visitor degrade to a regular visitor/mapper without any monads. Such visitor + is instantiated as {!C.Type.Mapper.base}. + + The other two useful monads, are [State] and [Search]. The visitor in these + monads is instantiated as {!C.Type.Mapper.State.base} and + {!C.Type.Mapper.Finder.base} correspondingly. The former is useful to + implement a regular visitor, that will fold an abitrary value over the type + structure, or to implement a mapper, that can also have a state. The latter, + is useful for implementing a search with a shortcut, i.e., when the + searching is terminated as soon as the target is found. *) module type S = sig - type ('a,'e) m + type ('a, 'e) m class ['e] base : object - method run : t -> (t,'e) m - - method enter_type : t -> (unit,'e) m - method map_type : t -> (t,'e) m - method leave_type : t -> (unit,'e) m - - method enter_void : t -> (unit,'e) m - method map_void : t -> (t,'e) m - method leave_void : t -> (unit,'e) m - - - method enter_array : array -> (unit,'e) m - method map_array : array -> (array,'e) m - method leave_array : array -> (unit,'e) m - - method enter_pointer : t -> (unit,'e) m - method map_pointer : t -> (t,'e) m - method leave_pointer : t -> (unit,'e) m - - method enter_function : proto -> (unit,'e) m - method map_function : proto -> (proto,'e) m - method leave_function : proto -> (unit,'e) m - - method enter_union : compound -> (unit,'e) m - method map_union : compound -> (compound,'e) m - method leave_union : compound -> (unit,'e) m - - method enter_structure : compound -> (unit,'e) m - method map_structure : compound -> (compound,'e) m - method leave_structure : compound -> (unit,'e) m - - method enter_cv_qualifier : cv qualifier -> (unit,'e) m - method map_cv_qualifier : cv qualifier -> (cv qualifier,'e) m - method leave_cv_qualifier : cv qualifier -> (unit,'e) m - - method enter_cvr_qualifier : cvr qualifier -> (unit,'e) m - method map_cvr_qualifier : cvr qualifier -> (cvr qualifier,'e) m - method leave_cvr_qualifier : cvr qualifier -> (unit,'e) m - - method enter_basic : basic -> (unit,'e) m - method map_basic : basic -> (basic,'e) m - method leave_basic : basic -> (unit,'e) m - - method enter_integer : integer -> (unit,'e) m - method map_integer : integer -> (integer,'e) m - method leave_integer : integer -> (unit,'e) m - - method enter_char : char -> (unit,'e) m - method map_char : char -> (char,'e) m - method leave_char : char -> (unit,'e) m - - method enter_signed : signed -> (unit,'e) m - method map_signed : signed -> (signed,'e) m - method leave_signed : signed -> (unit,'e) m - - method enter_unsigned : unsigned -> (unit,'e) m - method map_unsigned : unsigned -> (unsigned,'e) m - method leave_unsigned : unsigned -> (unit,'e) m - - method enter_enum : (string * int64 option) list -> (unit,'e) m - method map_enum : (string * int64 option) list -> ((string * int64 option) list,'e) m - method leave_enum : (string * int64 option) list -> (unit,'e) m - - method enter_floating : floating -> (unit,'e) m - method map_floating : floating -> (floating,'e) m - method leave_floating : floating -> (unit,'e) m - - method enter_real : real -> (unit,'e) m - method map_real : real -> (real,'e) m - method leave_real : real -> (unit,'e) m - - method enter_complex : complex -> (unit,'e) m - method map_complex : complex -> (complex,'e) m - method leave_complex : complex -> (unit,'e) m - - method enter_attrs : attr list -> (unit,'e) m - method map_attrs : attr list -> (attr list,'e) m - method leave_attrs : attr list -> (unit,'e) m - - method enter_attr : attr -> (unit,'e) m - method map_attr : attr -> (attr,'e) m - method leave_attr : attr -> (unit,'e) m - - method enter_name : string -> (unit,'e) m - method map_name : string -> (string,'e) m - method leave_name : string -> (unit,'e) m + method run : t -> (t, 'e) m + method enter_type : t -> (unit, 'e) m + method map_type : t -> (t, 'e) m + method leave_type : t -> (unit, 'e) m + method enter_void : t -> (unit, 'e) m + method map_void : t -> (t, 'e) m + method leave_void : t -> (unit, 'e) m + method enter_array : array -> (unit, 'e) m + method map_array : array -> (array, 'e) m + method leave_array : array -> (unit, 'e) m + method enter_pointer : t -> (unit, 'e) m + method map_pointer : t -> (t, 'e) m + method leave_pointer : t -> (unit, 'e) m + method enter_function : proto -> (unit, 'e) m + method map_function : proto -> (proto, 'e) m + method leave_function : proto -> (unit, 'e) m + method enter_union : compound -> (unit, 'e) m + method map_union : compound -> (compound, 'e) m + method leave_union : compound -> (unit, 'e) m + method enter_structure : compound -> (unit, 'e) m + method map_structure : compound -> (compound, 'e) m + method leave_structure : compound -> (unit, 'e) m + method enter_cv_qualifier : cv qualifier -> (unit, 'e) m + method map_cv_qualifier : cv qualifier -> (cv qualifier, 'e) m + method leave_cv_qualifier : cv qualifier -> (unit, 'e) m + method enter_cvr_qualifier : cvr qualifier -> (unit, 'e) m + method map_cvr_qualifier : cvr qualifier -> (cvr qualifier, 'e) m + method leave_cvr_qualifier : cvr qualifier -> (unit, 'e) m + method enter_basic : basic -> (unit, 'e) m + method map_basic : basic -> (basic, 'e) m + method leave_basic : basic -> (unit, 'e) m + method enter_integer : integer -> (unit, 'e) m + method map_integer : integer -> (integer, 'e) m + method leave_integer : integer -> (unit, 'e) m + method enter_char : char -> (unit, 'e) m + method map_char : char -> (char, 'e) m + method leave_char : char -> (unit, 'e) m + method enter_signed : signed -> (unit, 'e) m + method map_signed : signed -> (signed, 'e) m + method leave_signed : signed -> (unit, 'e) m + method enter_unsigned : unsigned -> (unit, 'e) m + method map_unsigned : unsigned -> (unsigned, 'e) m + method leave_unsigned : unsigned -> (unit, 'e) m + method enter_enum : (string * int64 option) list -> (unit, 'e) m + + method map_enum : + (string * int64 option) list -> ((string * int64 option) list, 'e) m + + method leave_enum : (string * int64 option) list -> (unit, 'e) m + method enter_floating : floating -> (unit, 'e) m + method map_floating : floating -> (floating, 'e) m + method leave_floating : floating -> (unit, 'e) m + method enter_real : real -> (unit, 'e) m + method map_real : real -> (real, 'e) m + method leave_real : real -> (unit, 'e) m + method enter_complex : complex -> (unit, 'e) m + method map_complex : complex -> (complex, 'e) m + method leave_complex : complex -> (unit, 'e) m + method enter_attrs : attr list -> (unit, 'e) m + method map_attrs : attr list -> (attr list, 'e) m + method leave_attrs : attr list -> (unit, 'e) m + method enter_attr : attr -> (unit, 'e) m + method map_attr : attr -> (attr, 'e) m + method leave_attr : attr -> (unit, 'e) m + method enter_name : string -> (unit, 'e) m + method map_name : string -> (string, 'e) m + method leave_name : string -> (unit, 'e) m end end diff --git a/lib/bap_c/bap_c_type_printer.ml b/lib/bap_c/bap_c_type_printer.ml index c8d0c8929..e44ece753 100644 --- a/lib/bap_c/bap_c_type_printer.ml +++ b/lib/bap_c/bap_c_type_printer.ml @@ -1 +1,2 @@ -let pp = Bap_c_type.pp and pp_proto = Bap_c_type.pp_proto +let pp = Bap_c_type.pp +and pp_proto = Bap_c_type.pp_proto diff --git a/lib/bap_c/dune b/lib/bap_c/dune index 3871b2c14..41c19223a 100644 --- a/lib/bap_c/dune +++ b/lib/bap_c/dune @@ -1,13 +1,14 @@ (library - (name bap_c) - (public_name bap-c) - (preprocess (pps ppx_bap)) - (libraries - bap - bap-abi - bap-api - bap-core-theory - bap-knowledge - core_kernel - monads - regular)) \ No newline at end of file + (name bap_c) + (public_name bap-c) + (preprocess + (pps ppx_bap)) + (libraries + bap + bap-abi + bap-api + bap-core-theory + bap-knowledge + core + monads + regular)) diff --git a/lib/bap_core_theory/bap_core_theory.ml b/lib/bap_core_theory/bap_core_theory.ml index 54cc3740f..fb31bca39 100644 --- a/lib/bap_core_theory/bap_core_theory.ml +++ b/lib/bap_core_theory/bap_core_theory.ml @@ -1,5 +1,4 @@ open Bap_knowledge - module KB = Knowledge module Theory = struct @@ -38,34 +37,30 @@ module Theory = struct type compiler = Compiler.t type role = Role.t type alias = Alias.t - type ('a,'k) origin = ('a,'k) Origin.t + type ('a, 'k) origin = ('a, 'k) Origin.t type system = System.t type abi = Abi.t type fabi = Fabi.t type filetype = Filetype.t - type 'a value = 'a Value.t - type 'a effect = 'a Effect.t + type 'a effect_ = 'a Effect.t type 'a pure = 'a value knowledge - type 'a eff = 'a effect knowledge - - + type 'a eff = 'a effect_ knowledge type bool = Bool.t pure type 'a bitv = 'a Bitv.t pure - type ('a,'b) mem = ('a,'b) Mem.t pure + type ('a, 'b) mem = ('a, 'b) Mem.t pure type 'f float = 'f Float.t pure type rmode = Rmode.t pure - type data = Effect.Sort.data type ctrl = Effect.Sort.ctrl - - type ('r,'s) format = ('r,'s) Float.format - + type ('r, 's) format = ('r, 's) Float.format type 'a var = 'a Var.t type word = Bitvec.t type label = program Knowledge.Object.t type cls = Bap_core_theory_definition.theory_cls + let t = Bap_core_theory_definition.theory + type theory = Bap_core_theory_definition.theory type t = theory @@ -82,6 +77,7 @@ module Theory = struct module type Core = Bap_core_theory_definition.Core type core = (module Core) + module Basic = struct module Empty : Basic = Bap_core_theory_empty.Core module Make = Bap_core_theory_basic.Make diff --git a/lib/bap_core_theory/bap_core_theory.mli b/lib/bap_core_theory/bap_core_theory.mli index d0968f920..6a1f7b79d 100644 --- a/lib/bap_core_theory/bap_core_theory.mli +++ b/lib/bap_core_theory/bap_core_theory.mli @@ -2,48 +2,43 @@ {1 A Gentle Introduction to the Core Theory} - The Core Theory is an intermediate language that is designed to - express the semantics of computer programs. It focuses on programs - that are represented in binary machine code and is capable of an - accurate representation of the architectural and micro-architectural - details of the program behavior. - - The language is extensible. It is possible to add new language - features, without affecting existing analyses and even - without recompilation. Adding new analyses also doesn't require - any changes to the existing code, which also could be reused - without recompilation. Thus the language doesn't suffer from the - expression problem. - - The language is rigidly typed with types expressed as OCaml - types. A type of a Core Theory term is inferred (and checked) by - the OCaml type system, which can statically ascertain that the - term is not ill-formed and no analysis will get stuck. - - The language is adaptable. Analysts can select a designated - subset of the language which is relevant to their tasks without - getting bogged down by the irrelevant architectural details. - - The language can express the semantics of the floating-point - operations including operations, but not limiting to, specified in - the IEEE754 standard. - - The language facilitates developing custom intermediate - representation and languages, which could be seamlessly introduced - in the analysis pipeline without breaking existing components. The - new language is compatible with BIL, BIR, and older variants of - BIL. It is potentially compatible with any other intermediate - representations. - + The Core Theory is an intermediate language that is designed to express the + semantics of computer programs. It focuses on programs that are represented + in binary machine code and is capable of an accurate representation of the + architectural and micro-architectural details of the program behavior. + + The language is extensible. It is possible to add new language features, + without affecting existing analyses and even without recompilation. Adding + new analyses also doesn't require any changes to the existing code, which + also could be reused without recompilation. Thus the language doesn't suffer + from the expression problem. + + The language is rigidly typed with types expressed as OCaml types. A type of + a Core Theory term is inferred (and checked) by the OCaml type system, which + can statically ascertain that the term is not ill-formed and no analysis + will get stuck. + + The language is adaptable. Analysts can select a designated subset of the + language which is relevant to their tasks without getting bogged down by the + irrelevant architectural details. + + The language can express the semantics of the floating-point operations + including operations, but not limiting to, specified in the IEEE754 + standard. + + The language facilitates developing custom intermediate representation and + languages, which could be seamlessly introduced in the analysis pipeline + without breaking existing components. The new language is compatible with + BIL, BIR, and older variants of BIL. It is potentially compatible with any + other intermediate representations. {2 The Language Hierarchy} - The Core Theory is not really a language but a family of - languages. If we will order the languages in this family by - subsumption, then we will get the following Hasse diagram: + The Core Theory is not really a language but a family of languages. If we + will order the languages in this family by subsumption, then we will get the + following Hasse diagram: {v - o Core | Trans o--------+--------o Float @@ -56,58 +51,51 @@ | | | | | o o o o o Init Bool Bitv Memory Effect - v} - The Core language subsumes all other sub-languages and includes - modular arithmetic and other operations on bitvectos, operations - with memories, registers, floating-points including - transcendental functions. - - The reason to have so many languages is purely pragmatic: to - enable specialized implementations of analyses and lifters. This - structure is not really mandated, the languages are defined - structurally, not nominally, so it is possible to combine - languages in arbitrary ways, as well as define new languages. + The Core language subsumes all other sub-languages and includes modular + arithmetic and other operations on bitvectos, operations with memories, + registers, floating-points including transcendental functions. - The Core language, despite being at the top of our hierarchy, is - still very low-level and basic. It is intended to reflect - operations carried by classical computers with Harvard or - Princeton architectures. Therefore we chose the name "Core" to - reflect our vision of the Core language as the base for - higher-level hierarchies of languages. + The reason to have so many languages is purely pragmatic: to enable + specialized implementations of analyses and lifters. This structure is not + really mandated, the languages are defined structurally, not nominally, so + it is possible to combine languages in arbitrary ways, as well as define new + languages. + The Core language, despite being at the top of our hierarchy, is still very + low-level and basic. It is intended to reflect operations carried by + classical computers with Harvard or Princeton architectures. Therefore we + chose the name "Core" to reflect our vision of the Core language as the base + for higher-level hierarchies of languages. {2 Hierarchy of Terms} - Terms and operations of the Core Theory languages are typed to - prevent the creation of ill-formed programs. We use the word sort - to denote a set of terms that share the same properties. The Core - Theory comes with a collection of predefined sorts, which are used - to specify the Core language, but it is possible to define new - sorts, to keep the theory extensible. + Terms and operations of the Core Theory languages are typed to prevent the + creation of ill-formed programs. We use the word sort to denote a set of + terms that share the same properties. The Core Theory comes with a + collection of predefined sorts, which are used to specify the Core language, + but it is possible to define new sorts, to keep the theory extensible. - The terms of the Core Theory are divided in two classes - values - and effects. A value term denotes the semantics of programs that - produce values. Roughly, values correspond to the syntactic class of - language expressions. + The terms of the Core Theory are divided in two classes - values and + effects. A value term denotes the semantics of programs that produce values. + Roughly, values correspond to the syntactic class of language expressions. - Effects is a class of terms that do not produce values, but side - effects or just effects, e.g., changing a value of a register, - loading or storing memory location, performing I/O operation on a - port or issuing a synchronization barrier, etc. Roughly, effects - correspond to the syntactic class of language statements. + Effects is a class of terms that do not produce values, but side effects or + just effects, e.g., changing a value of a register, loading or storing + memory location, performing I/O operation on a port or issuing a + synchronization barrier, etc. Roughly, effects correspond to the syntactic + class of language statements. - Both values and effects are knowledge classes. Each class is - further subdivided into an infinite set of sorts. The class of - values is inhabited with terms of Bool, Bitv, Mem, Float, and - Rmode. Some sorts are indexed, so they represent an (infinite) - family of sorts. For example, Bitv[s] is a family of bitvector sorts - indexed by their widths, e.g, Bitv[8], Bitv[32], etc. - - The class of effects is subdivided into two sorts of effect, those - that affect the control flow and those that affect the data flow. + Both values and effects are knowledge classes. Each class is further + subdivided into an infinite set of sorts. The class of values is inhabited + with terms of Bool, Bitv, Mem, Float, and Rmode. Some sorts are indexed, so + they represent an (infinite) family of sorts. For example, Bitv[s] is a + family of bitvector sorts indexed by their widths, e.g, Bitv[8], Bitv[32], + etc. + The class of effects is subdivided into two sorts of effect, those that + affect the control flow and those that affect the data flow. {v Term @@ -126,143 +114,127 @@ Float[f,s] o--+ | Rmode o--+ - - v} - {2:vars Variables} - Variables are ubiquitous in programming languages and could be - used to reference memory locations, CPU registers, or just - be bound to expressions. Sometimes variables are typed, sometimes - they are just identifiers with not associated type. - - In the Core Theory all variables are sorted, i.e., they have an - associated value sort. Variables are also having scope (lexical - visibility), and extent (lifetime) Finally, variables could be - mutable or immutable. - - A physical variable is a global mutable variable with the infinite - scope and extent. They are used to refer predefined (micro) - architectural locations of a modeled system, e.g., registers, - memory banks, caches, register files, etc. Global variables has - identifiers that are the same as names, e.g., `RAX`, `R0`, `mem`, - etc. The important thing, is that a global variable usually has - some physical representation. - - Virtual variables are dual to physical variables and are further - subdivided into mutable and immutable. - - A mutable virtual variable represents an unspecified scratch - location that holds data of the specified sort. They could be used - to abstract an actual physical location in a modeled system (when - it is not relevant or just not known) or just to simplify the - analysis. The mutable virtual variables have identifier of the - form [#], e.g, [#1], [#2048], etc. - - Finally, an immutable virtual variable is a local variable that - holds a value of an expression. It has a limited scope and its - immutability is ensured by the type system since the scope of a - local binding can contain only pure terms, i.e., no - side-effects. These variables have identifiers of the form - [$], e.g., [$1], [$2], etc, and since their scope is + Variables are ubiquitous in programming languages and could be used to + reference memory locations, CPU registers, or just be bound to expressions. + Sometimes variables are typed, sometimes they are just identifiers with not + associated type. + + In the Core Theory all variables are sorted, i.e., they have an associated + value sort. Variables are also having scope (lexical visibility), and extent + (lifetime) Finally, variables could be mutable or immutable. + + A physical variable is a global mutable variable with the infinite scope and + extent. They are used to refer predefined (micro) architectural locations of + a modeled system, e.g., registers, memory banks, caches, register files, + etc. Global variables has identifiers that are the same as names, e.g., + `RAX`, `R0`, `mem`, etc. The important thing, is that a global variable + usually has some physical representation. + + Virtual variables are dual to physical variables and are further subdivided + into mutable and immutable. + + A mutable virtual variable represents an unspecified scratch location that + holds data of the specified sort. They could be used to abstract an actual + physical location in a modeled system (when it is not relevant or just not + known) or just to simplify the analysis. The mutable virtual variables have + identifier of the form [#], e.g, [#1], [#2048], etc. + + Finally, an immutable virtual variable is a local variable that holds a + value of an expression. It has a limited scope and its immutability is + ensured by the type system since the scope of a local binding can contain + only pure terms, i.e., no side-effects. These variables have identifiers of + the form [$], e.g., [$1], [$2], etc, and since their scope is limited, those identifiers are reused in different scopes. - {2 Theories and Semantics} - Languages of the Core Theory, including the Core itself are - represented as module signatures, i.e., they are pure abstractions - or interfaces that do not define any data types, functions, or values. - - This approach is called tagless-final style [1], pioneered by - Carette and Kiselyov and later rediscovered under the name "Object - algebras" by Oliviera and Cook [2]. We encourage to read those - papers and accompanying literature, but it is not strictly needed, - especially since the underlying idea is pretty simple. - - In the final style, an embedded language is not represented by an - abstract syntax tree or some intermediate representation data - structure, but by denotations in a semantic algebra. Where the - semantic algebra is an implementation (structure) of the language - signature. Or, in other words, it is its denotational semantics. - - The structure may choose, basically, any denotations, as long as - it fits the signature. For example, the language could be denoted - with its textual representation, BIL code, LLVM IR, BIR, sets of - reachable addresses, and so on. In other words, an implementation - of the Core signature could be seen as an analysis that computes the - property of a term. + Languages of the Core Theory, including the Core itself are represented as + module signatures, i.e., they are pure abstractions or interfaces that do + not define any data types, functions, or values. + + This approach is called tagless-final style [1], pioneered by Carette and + Kiselyov and later rediscovered under the name "Object algebras" by Oliviera + and Cook [2]. We encourage to read those papers and accompanying literature, + but it is not strictly needed, especially since the underlying idea is + pretty simple. + + In the final style, an embedded language is not represented by an abstract + syntax tree or some intermediate representation data structure, but by + denotations in a semantic algebra. Where the semantic algebra is an + implementation (structure) of the language signature. Or, in other words, it + is its denotational semantics. + + The structure may choose, basically, any denotations, as long as it fits the + signature. For example, the language could be denoted with its textual + representation, BIL code, LLVM IR, BIR, sets of reachable addresses, and so + on. In other words, an implementation of the Core signature could be seen as + an analysis that computes the property of a term. Unlike a classical final approach described in [1] the Core Theory - signatures do not include any abstract types, all types mentioned - in the theories are defined on the scope of the Core Theory. This - constraint basically turns a structure, which implements the - Theory, into a simple array of functions, a first class value with - no caveats (like types escaping the scope, etc). - - To enable each semantic algebra to have its own denotation we - employ ['a Knowledge.Value.t]. Thus both Core Theory values and - effects are instances of the Knowledge Value, parametrized by - corresponding class indices. The Knowledge Value is an instance of - Domain, that makes it ideal for representing the denotational - semantics of programs. The Knowledge value is an extensible - record, where each field corresponds to a particular denotation, - it is possible to store several denotations in one Knowledge + signatures do not include any abstract types, all types mentioned in the + theories are defined on the scope of the Core Theory. This constraint + basically turns a structure, which implements the Theory, into a simple + array of functions, a first class value with no caveats (like types escaping + the scope, etc). + + To enable each semantic algebra to have its own denotation we employ + ['a Knowledge.Value.t]. Thus both Core Theory values and effects are + instances of the Knowledge Value, parametrized by corresponding class + indices. The Knowledge Value is an instance of Domain, that makes it ideal + for representing the denotational semantics of programs. The Knowledge value + is an extensible record, where each field corresponds to a particular + denotation, it is possible to store several denotations in one Knowledge value. - Denotational semantics is composable, i.e., a denotation of - a term is composed from denotations of its constituent terms. - However, some denotations are context dependent. To enable - this, we made a term denotation an instance of ['a knowledge], - i.e., a knowledge dependent computation. + Denotational semantics is composable, i.e., a denotation of a term is + composed from denotations of its constituent terms. However, some + denotations are context dependent. To enable this, we made a term denotation + an instance of ['a knowledge], i.e., a knowledge dependent computation. - To summarize, a denotation is a structure that implements methods - of the corresponding structure. Each method corresponds to a - language form, e.g, [val add : 'a bitv -> 'a bitv -> 'a bitv], - corresponds to an addition of two bitvectors. The implementation, - builds the denotation of the term from the denotations of its - inputs, e.g., + To summarize, a denotation is a structure that implements methods of the + corresponding structure. Each method corresponds to a language form, e.g, + [val add : 'a bitv -> 'a bitv -> 'a bitv], corresponds to an addition of + two bitvectors. The implementation, builds the denotation of the term from + the denotations of its inputs, e.g., {[ let add x y = x >>-> fun x -> y >>-> fun y -> - match x, y with - | Some x, Some y -> const (Bitvec.(x + y)) + match (x, y) with + | Some x, Some y -> const Bitvec.(x + y) | _ -> nonconst ]} - Where [>>->] extracts the module specific denotation and [const], - [noncost] put them back (assuming that the denotation is the - classical constant folding lattice). - - The final style makes it easy to write a fold-style analysis, such - as constant folding, taint analysis, etc. Since all terms are - knowledge dependent computations, i.e., wrapped into the knowledge - monad, which turns any computation into a fixed-point computation, - it is also possible to write data-flow analysis and other forms of - abstract interpretation. In fact, it was shown, that any - optimization or analysis could be written in the final style in a - modular and composable way [3]. However, the classical approach, + Where [>>->] extracts the module specific denotation and [const], [noncost] + put them back (assuming that the denotation is the classical constant + folding lattice). + + The final style makes it easy to write a fold-style analysis, such as + constant folding, taint analysis, etc. Since all terms are knowledge + dependent computations, i.e., wrapped into the knowledge monad, which turns + any computation into a fixed-point computation, it is also possible to write + data-flow analysis and other forms of abstract interpretation. In fact, it + was shown, that any optimization or analysis could be written in the final + style in a modular and composable way [3]. However, the classical approach, that uses tagged AST and pattern matching is not denied at all. - Since the denotation could be anything (that is an instance of - domain), it is quite natural to use BIL, and BIR, or any other - concrete syntax tree as a possible denotation. Therefore, it is - possible to extract those denotations and write your analysis - using the save haven of pattern matching. - + Since the denotation could be anything (that is an instance of domain), it + is quite natural to use BIL, and BIR, or any other concrete syntax tree as a + possible denotation. Therefore, it is possible to extract those denotations + and write your analysis using the save haven of pattern matching. {2 Writing a new denotation} - Any denotation must be an instance of the Core signature. However, - it is not always required to implement all methods, as they could - be inherited from other instance or filled in with the Empty - Theory. Once analysis is written it should be declared, so that it - could be later run, e.g., let's extend a hypothetical - ["constant-tracker"] analysis: + Any denotation must be an instance of the Core signature. However, it is not + always required to implement all methods, as they could be inherited from + other instance or filled in with the Empty Theory. Once analysis is written + it should be declared, so that it could be later run, e.g., let's extend a + hypothetical ["constant-tracker"] analysis: {[ let () = @@ -277,41 +249,37 @@ end ]} - The real analysis should store it results either in the knowledge - base or directly in denotations of the terms (or in both places). + The real analysis should store it results either in the knowledge base or + directly in denotations of the terms (or in both places). {2 Instantiating a theory} - To use a theory we need to instantiate it. In the previous section - we instantiated a theory using the [Theory.require] function, that - returns a previously declared theory. But what if we need to use - several denotations, e.g., when we want to have both a - constant-tracker and BIL for our analysis. - - The final style implementations, in Scala, OCaml, and Haskell, - usually employ functors or type classes, which both require a user - to select an instance of a type class, which should be used in - the given context. Some languages allow only one instance of a class - per type, others allow multiple, but still needs a declaration of - some instances as canonical. - - The Core Theory addresses this issue by leveraging the structure - of the Knowledge universal values and instantiating all theory - instances simultaneously, so that for each language term the sum - of all denotations is provided. To exclude the overhead of - evaluating denotations that might be unused, it is possible to - limit the set of instantiated theories by specifying a concrete - list of required theories. The requirements are specified in the - form of semantic tags instead of concrete theory names, to prevent - explicit dependencies on implementations. However, it is still + To use a theory we need to instantiate it. In the previous section we + instantiated a theory using the [Theory.require] function, that returns a + previously declared theory. But what if we need to use several denotations, + e.g., when we want to have both a constant-tracker and BIL for our analysis. + + The final style implementations, in Scala, OCaml, and Haskell, usually + employ functors or type classes, which both require a user to select an + instance of a type class, which should be used in the given context. Some + languages allow only one instance of a class per type, others allow + multiple, but still needs a declaration of some instances as canonical. + + The Core Theory addresses this issue by leveraging the structure of the + Knowledge universal values and instantiating all theory instances + simultaneously, so that for each language term the sum of all denotations is + provided. To exclude the overhead of evaluating denotations that might be + unused, it is possible to limit the set of instantiated theories by + specifying a concrete list of required theories. The requirements are + specified in the form of semantic tags instead of concrete theory names, to + prevent explicit dependencies on implementations. However, it is still possible to explicitly request a particular theory. - It is also possible to define the context of the theory, to enable - those theories that are not generic and are applicable only to the - specified context. For example, + It is also possible to define the context of the theory, to enable those + theories that are not generic and are applicable only to the specified + context. For example, {[ - Theory.instance () ~context:["arm"; "arm-gnueabi"] ~requires:[ @@ -322,16 +290,14 @@ "bap.std:bil-semantics" ] >>= Theory.require >>= fun (module Theory) -> - ]} - In the example above, theories that are specific to ARM - architecture, in particular to the arm-gnueabi ABI, will be - instantiated (in addition to other general theories). The - [requires] parameter specifies a few semantic tags, describing - what kind of semantic information is needed, as well as one theory - explicitly, the [bap.std:bil-semantics], to ensure that each term - has a BIL denotation. + In the example above, theories that are specific to ARM architecture, in + particular to the arm-gnueabi ABI, will be instantiated (in addition to + other general theories). The [requires] parameter specifies a few semantic + tags, describing what kind of semantic information is needed, as well as one + theory explicitly, the [bap.std:bil-semantics], to ensure that each term has + a BIL denotation. References: @@ -339,165 +305,144 @@ - [2]: http://www.cs.utexas.edu/~wcook/Drafts/2012/ecoop2012.pdf - [3]: http://okmij.org/ftp/tagless-final/course/optimizations.html - {2 Parsing binary code} - After a theory is instantiated it could be used to build terms, - which will trigger analyses associated with each instantiated - theory. - - However, a program is usually represented as a binary machine - code, which should be parsed into the Core Theory terms. This - process is called {i lifting} and program components that do - lifting are called {i lifters}. - - Lifting is a notoriously hard task, since the machine code is an - untyped representation and Core Theory terms are rigidly typed. To - alleviate this problem, the Core Theory library provides a helper - module [Parser] which could be used to lift an untyped - representation into the typed Core Theory term. - - It is also possible to reuse lifters which translate the machine - code in some IL, but writing a parser form that IL. The [Parser] - module is especially useful here, since it was specifically - designed for such use-cases. -*) - -open Core_kernel[@@warning "-D"] -open Caml.Format -open Bap_knowledge + After a theory is instantiated it could be used to build terms, which will + trigger analyses associated with each instantiated theory. + However, a program is usually represented as a binary machine code, which + should be parsed into the Core Theory terms. This process is called + {i lifting} and program components that do lifting are called {i lifters}. -module KB = Knowledge + Lifting is a notoriously hard task, since the machine code is an untyped + representation and Core Theory terms are rigidly typed. To alleviate this + problem, the Core Theory library provides a helper module [Parser] which + could be used to lift an untyped representation into the typed Core Theory + term. + It is also possible to reuse lifters which translate the machine code in + some IL, but writing a parser form that IL. The [Parser] module is + especially useful here, since it was specifically designed for such + use-cases. *) -(** The Core Theory. *) -module Theory : sig +open Core +open Format +open Bap_knowledge +module KB = Knowledge - (** The class index for all Core Theories. *) +(** The Core Theory. *) +module Theory : sig type cls + (** The class index for all Core Theories. *) - - (** A theory instance. - To create a new theory instance use the {!instance} function. - To manifest a theory into an OCaml module, use the {!require} - function. *) type theory = cls KB.obj + (** A theory instance. To create a new theory instance use the {!instance} + function. To manifest a theory into an OCaml module, use the {!require} + function. *) - - (** Theory.t is theory. *) type t = theory + (** Theory.t is theory. *) (** The denotation of expression. - Values are used to express the semantics of terms that evaluate - to a value, aka expressions. Values are sorted and value sorts - hold static information about the value representation, like the - number of bits in a bitvector or the representation format in a - floating-point value. - - All values belong to the same Knowledge class and thus share the - same set of properties, with each property being a specific - denotation provided by one or more theories. For example, the - [bap.std:exp] slot holds the denotation of a value in terms of - BIL expressions. - *) - module Value : sig + Values are used to express the semantics of terms that evaluate to a + value, aka expressions. Values are sorted and value sorts hold static + information about the value representation, like the number of bits in a + bitvector or the representation format in a floating-point value. - (** a type for the value sort *) + All values belong to the same Knowledge class and thus share the same set + of properties, with each property being a specific denotation provided by + one or more theories. For example, the [bap.std:exp] slot holds the + denotation of a value in terms of BIL expressions. *) + module Value : sig type +'a sort + (** a type for the value sort *) - - (** the class of the values. *) type cls + (** the class of the values. *) - - (** the value type is an instance of Knowledge.value *) - type 'a t = (cls,'a sort) KB.cls KB.value + type 'a t = (cls, 'a sort) KB.cls KB.value + (** the value type is an instance of Knowledge.value *) type 'a value = 'a t - (** the class of all values. *) - val cls : (cls,unit) KB.cls + val cls : (cls, unit) KB.cls + (** the class of all values. *) - - (** [empty s] creates an empty value of sort [s]. - - The empty value doesn't hold any denotations and represents an - absence of information about the value. - *) val empty : 'a sort -> 'a t + (** [empty s] creates an empty value of sort [s]. + The empty value doesn't hold any denotations and represents an absence + of information about the value. *) - (** [sort v] is the value sort. - - The value sort holds static information about values of that - sort. *) val sort : 'a t -> 'a sort + (** [sort v] is the value sort. + The value sort holds static information about values of that sort. *) + val resort : ('a sort -> 'b sort option) -> 'a t -> 'b t option (** [resort refine x] applies [refine] to the sort of [x]. - Returns the value [x] with the refined sort, if applicable, - otherwise returns the original value. - - @since 2.3.0 - *) - val resort : ('a sort -> 'b sort option) -> 'a t -> 'b t option + Returns the value [x] with the refined sort, if applicable, otherwise + returns the original value. + @since 2.3.0 *) + val forget : 'a t -> unit t (** [forget v] erases the type index of the value. - The returned value has the monomorphized [Top.t] type and can - be stored in containers, serialized, etc. + The returned value has the monomorphized [Top.t] type and can be stored + in containers, serialized, etc. To restore the type index use the {!refine} function. @since 2.3.0 Note: this is a convenient function that just does - [Knowledge.Value.refine v @@ Sort.forget @@ sort v] - *) - val forget : 'a t -> unit t + [Knowledge.Value.refine v @@ Sort.forget @@ sort v] *) (** A value with an erased sort type index. - The monomorphized value could be stored in a container, - serialized and deserialized and otherwise treated as a - regular value. To erase the type index, use the - [Value.forget] function. + The monomorphized value could be stored in a container, serialized and + deserialized and otherwise treated as a regular value. To erase the type + index, use the [Value.forget] function. The type index could be restored using [Value.refine] or [Value.Sort.refine] functions. - @since 2.3.0 - *) + @since 2.3.0 *) module Top : sig - type t = (cls,unit sort) KB.cls KB.value + type t = (cls, unit sort) KB.cls KB.value + val cls : (cls, unit sort) KB.cls + include KB.Value.S with type t := t end - (** A eDSL for dispatching on multiple types. - The syntax involves only two operators, [can] that - applys a sort refinining function, and [let|] - glues several cases together. Let's start with a simple - example, + The syntax involves only two operators, [can] that applys a sort + refinining function, and [let|] glues several cases together. Let's + start with a simple example, {[ - let f x = Match.(begin - let| x = can Bool.refine x @@ fun x -> + let f x = + Match.( + let| x = + can Bool.refine x @@ fun x -> (* here x has type [Bool.t value] *) - `Bool x in - let| x = can Bitv.refine x @@ fun x -> + `Bool x + in + let| x = + can Bitv.refine x @@ fun x -> (* and here x is ['a Bitv.t value] *) - `Bitv x in - let| x = can Mem.refine x @@ fun x -> + `Bitv x + in + let| x = + can Mem.refine x @@ fun x -> (* and now x is [('a,'b) Mem.t value] *) - `Mem x in - `Other x - end) + `Mem x + in + `Other x) ]} In general, the syntax is @@ -510,134 +455,123 @@ module Theory : sig default v} - where [s1],...,[sN] a refiners to types [t1],...,[tN], - respectively. + where [s1],...,[sN] a refiners to types [t1],...,[tN], respectively. {3 Semantics} - If in [can s1 x body] the sort of [x] can be refined to [t1] using - the refiner [s1] then [body] is applied to the value [x] with - the refined sort (and freshly generated type index if - needed) and the result of the whole expression is [body x] - and the nested below expressions are never - evaluated. Otherwise, if there is no refinement, the - expression [can s1 x body] is evaluated to [()] - and the next case is tried until the [default] case is hit. + If in [can s1 x body] the sort of [x] can be refined to [t1] using the + refiner [s1] then [body] is applied to the value [x] with the refined + sort (and freshly generated type index if needed) and the result of the + whole expression is [body x] and the nested below expressions are never + evaluated. Otherwise, if there is no refinement, the expression + [can s1 x body] is evaluated to [()] and the next case is tried until + the [default] case is hit. - @since 2.3.0 - *) + @since 2.3.0 *) module Match : sig type 'a t type 'a refiner = unit sort -> 'a sort option - val (let|) : 'b t -> (unit -> 'b) -> 'b + val ( let| ) : 'b t -> (unit -> 'b) -> 'b - (** [let| () = can s x f in can't] refines [x] to [s]. - - If the sort of [x] could be refined with [s] then [f] - is called with the refined value [x'] and the whole - expression is evaluated to [f x']. Otherwise, the control is - passed to [can't]. - *) val can : 'a refiner -> unit value -> ('a value -> 'b) -> 'b t + (** [let| () = can s x f in can't] refines [x] to [s]. + If the sort of [x] could be refined with [s] then [f] is called with + the refined value [x'] and the whole expression is evaluated to + [f x']. Otherwise, the control is passed to [can't]. *) + val both : + 'a refiner -> + unit value -> + 'b refiner -> + unit value -> + ('a value -> 'b value -> 'c) -> + 'c t (** [let| () = both sx x sy y f in no] applies two refiners in parallel. - If both [x] and [y] could be refined with [sx] and [sy] - respectively then [f x' y'] is called with the refined - values and becomes the value of the expression. Otherwise, - [no] is evaluated and becomes the value of the whole - expression. - *) - val both : - 'a refiner -> unit value -> - 'b refiner -> unit value -> - ('a value -> 'b value -> 'c) -> 'c t + If both [x] and [y] could be refined with [sx] and [sy] respectively + then [f x' y'] is called with the refined values and becomes the value + of the expression. Otherwise, [no] is evaluated and becomes the value + of the whole expression. *) end - (** Value Sorts. - A concrete and extensible representation of a value sort. The - sort usually holds the static information about the value - representation, like the width of a bitvector, the - representation format of a floating-point number, and so on. + A concrete and extensible representation of a value sort. The sort + usually holds the static information about the value representation, + like the width of a bitvector, the representation format of a + floating-point number, and so on. - This module is mostly needed when a new sort is defined. The - Core Theory provides a predefined collection of sorts, here is - the list: + This module is mostly needed when a new sort is defined. The Core Theory + provides a predefined collection of sorts, here is the list: - {!Bitv} - bitvectors, e.g., [BitVec(i)] - {!Mem} - memories, e.g., [Mem(BitVec(i), BitVec(j)] - - {!Float} - floating-points, e.g., [Float(IEEE754(2, 8, 23), BitVec(32)]; + - {!Float} - floating-points, e.g., + [Float(IEEE754(2, 8, 23), BitVec(32)]; - {!Rmode} - rounding mode, e.g., [Rmode]. - This module defines a simple DSL for specifying sorts, the DSL - grammar is made only from three rules: + This module defines a simple DSL for specifying sorts, the DSL grammar + is made only from three rules: {v sort = sym | int | sort(sort) v} - The DSL is embedded into the host language with an infix - operator [@->] for application, e.g., OCaml grammar for sorts is: + The DSL is embedded into the host language with an infix operator [@->] + for application, e.g., OCaml grammar for sorts is: - [v - sort = sym exp | int exp | sort "@->" sort - exp = ?a valid OCaml expression? - v] + [v sort = sym exp | int exp | sort "@->" sort exp = ?a valid OCaml + expression? v] - Both symbols and numbers are indexed with a type index, which - serves as a witness of the sort value, e.g., + Both symbols and numbers are indexed with a type index, which serves as + a witness of the sort value, e.g., {[ type int8 + let int8 : int8 num sort = Sort.int 8 ]} - Type indices enable explicit reflection of the target language - type system in the host type system, while still keeping the - typing rules under designer's control. - - As a working example, let's develop a sort for binary - fixed-point numbers. We need to encode the type of the - underlying bitvector as well as the scaling factor. Suppose, - we chose to encode the scaling factor by an integer position - of the point, e.g., 8 means scaling factor 2^8, i.e., a point - fixed on 8th bit from the left. - - The syntax of our sort will be [Fixpoint(,BitVec())], - but we will keep it private to enable further extensions. The - structure of the sort is explicitly captured in its type, in - our case, it will be ['p num -> 's Bitv.t -> fixpoint sym], - but since we want to keep it is hidden by our type [('p,'s) t]. - The same as with the built-in [Bitv] and [Mem] sorts. - - We declare a [fixpoint] constructor and keep it private, to - ensure that only we can construct (and refine) fixpoint - sorts. Since the sort type is abstract, we also need to - provide functions that access arguments of our sort. - - Finally, we need to provide the [refine] function, that will - cast an untyped sort to its type representation, essentially - proving that the sort is a valid fixpoint sort. + Type indices enable explicit reflection of the target language type + system in the host type system, while still keeping the typing rules + under designer's control. + + As a working example, let's develop a sort for binary fixed-point + numbers. We need to encode the type of the underlying bitvector as well + as the scaling factor. Suppose, we chose to encode the scaling factor by + an integer position of the point, e.g., 8 means scaling factor 2^8, + i.e., a point fixed on 8th bit from the left. + + The syntax of our sort will be [Fixpoint(,BitVec())], but we + will keep it private to enable further extensions. The structure of the + sort is explicitly captured in its type, in our case, it will be + ['p num -> 's Bitv.t -> fixpoint sym], but since we want to keep it is + hidden by our type [('p,'s) t]. The same as with the built-in [Bitv] and + [Mem] sorts. + + We declare a [fixpoint] constructor and keep it private, to ensure that + only we can construct (and refine) fixpoint sorts. Since the sort type + is abstract, we also need to provide functions that access arguments of + our sort. + + Finally, we need to provide the [refine] function, that will cast an + untyped sort to its type representation, essentially proving that the + sort is a valid fixpoint sort. {[ - module Fixpoint : sig type ('p, 's) t - val define : int -> 's Bitv.t sort -> ('p,'s) t sort - val refine : unit sort -> ('p,'s) t sort option - val bits : ('p,'s) t sort -> 's Bitv.t sort - val logscale : ('p,'s) t sort -> int + + val define : int -> 's Bitv.t sort -> ('p, 's) t sort + val refine : unit sort -> ('p, 's) t sort option + val bits : ('p, 's) t sort -> 's Bitv.t sort + val logscale : ('p, 's) t sort -> int end = struct type fixpoint - type ('m,'s) t = - 'm Value.Sort.num -> - 's Bitv.t -> - fixpoint Value.Sort.sym + type ('m, 's) t = + 'm Value.Sort.num -> 's Bitv.t -> fixpoint Value.Sort.sym let fixpoint = Value.Sort.Name.declare "FixPoint" - let define p s = Value.Sort.(int p @-> s @-> sym fixpoint) let refine s = Value.Sort.refine fixpoint s let bits s = Value.Sort.(hd (tl s)) @@ -646,562 +580,461 @@ module Theory : sig (* Example of usage: *) - - type ('m,'s) fixpoint = Fixpoint.t Value.sort - - type u32 (* type index for 32 bit ints *) - type p8 (* type index for points at 8th bit *) + type ('m, 's) fixpoint = Fixpoint.t Value.sort + type u32 (* type index for 32 bit ints *) + type p8 (* type index for points at 8th bit *) (* a sort of 32-bit bitvectors, usually provided by the CPU model *) let u32 : u32 Bitv.t Value.sort = Bitv.define 32 (* a sort of 8.32 fixed-point numbers. *) - let fp8_32 : (p8,u32) fixpoint = Fixpoint.define 8 u32 - ]} - - *) + let fp8_32 : (p8, u32) fixpoint = Fixpoint.define 8 u32 + ]} *) module Sort : sig type +'a t = 'a sort type +'a sym type +'a num type name - - (** [sym name] constructs a sort with the given name. - - A symbolic sort could represent an abstract data type with - no further information available, e.g., some machine status - word of unknown size or representation; it may also be used - to denote data with obvious representation, e.g., the [Bool] - sort; finally, a symbolic sort could be used as a - constructor name for an indexed sort, e.g., (BitVec(width)). - - See the Example in the module description for more - information. - - - *) val sym : name -> 'a sym sort + (** [sym name] constructs a sort with the given name. + A symbolic sort could represent an abstract data type with no further + information available, e.g., some machine status word of unknown size + or representation; it may also be used to denote data with obvious + representation, e.g., the [Bool] sort; finally, a symbolic sort could + be used as a constructor name for an indexed sort, e.g., + (BitVec(width)). - (** [int x] a numeric sort. - - While it is possible to create a standalone numeric sort, it - wouldn't be possible to refine it, since only symbolic sorts - re refinable. + See the Example in the module description for more information. *) - Numeric sorts are used mostly as parameters. See the Example - section of the module documentation for more information. - *) val int : int -> 'a num sort + (** [int x] a numeric sort. + While it is possible to create a standalone numeric sort, it wouldn't + be possible to refine it, since only symbolic sorts re refinable. - (** [app s1 s2] constructs a sort of sort [s1] and [s2]. - - An application could be seen as a tuple building operators, - thus this operation defines a sort that is described by two - other sorts. + Numeric sorts are used mostly as parameters. See the Example section + of the module documentation for more information. *) - Basically, the [app] operator builds a heterogenous list, - with elements which should be other sorts. The list could be - then traversed using the [Sort.hd] and [Sort.tl] operators, - and individual elements could be read with the [value] and - [name] operators. Since the structure of the sort is fully - encoded in this type, those operations are total. - *) val app : 'a sort -> 'b sort -> ('a -> 'b) sort + (** [app s1 s2] constructs a sort of sort [s1] and [s2]. + An application could be seen as a tuple building operators, thus this + operation defines a sort that is described by two other sorts. - (** [s1 @-> s2] is [app s1 s2] *) - val (@->) : 'a sort -> 'b sort -> ('a -> 'b) sort + Basically, the [app] operator builds a heterogenous list, with + elements which should be other sorts. The list could be then traversed + using the [Sort.hd] and [Sort.tl] operators, and individual elements + could be read with the [value] and [name] operators. Since the + structure of the sort is fully encoded in this type, those operations + are total. *) + val ( @-> ) : 'a sort -> 'b sort -> ('a -> 'b) sort + (** [s1 @-> s2] is [app s1 s2] *) - (** [value s] returns the number associated with the numeric sort. *) val value : 'a num sort -> int + (** [value s] returns the number associated with the numeric sort. *) + val name : 'a sym sort -> name (** [name s] returns the symbol associated with a symbolic sort *) - val name : 'a sym sort -> name - (** [hd s] the first argument of sort [s] *) val hd : ('a -> 'b) sort -> 'a sort + (** [hd s] the first argument of sort [s] *) - - (** [tl] the list of arguments of sort [s] excluding the first one*) val tl : ('a -> 'b) sort -> 'b sort + (** [tl] the list of arguments of sort [s] excluding the first one*) - + val refine : name -> unit sort -> 'a t option (** [refine witness s] restores the type of the sort. - The sort type is an index type which could be lost, e.g., - when the [forget] function is applied or when the sort is - stored and read from its textual representation. + The sort type is an index type which could be lost, e.g., when the + [forget] function is applied or when the sort is stored and read from + its textual representation. - The [refine] function will re-instantiate the type index if - the constructor name of the sort [s] is the [name]. + The [refine] function will re-instantiate the type index if the + constructor name of the sort [s] is the [name]. - This function gives a mandate for the refine function to - index the sort [s] with any type, which will breach the sort - type safety, therefore this function should be used with - care and be hidden behind the abstraction barrier and have a - concrete type. + This function gives a mandate for the refine function to index the + sort [s] with any type, which will breach the sort type safety, + therefore this function should be used with care and be hidden behind + the abstraction barrier and have a concrete type. See the Example section in the module documentation for the - demonstration of how refine should be used. - *) - val refine : name -> unit sort -> 'a t option + demonstration of how refine should be used. *) - - (** [forget s] forgets the type index associated with the sort [s]. - - This is effectively an upcasting function, that could be - used when the typing information is not necessary - anymore or is not representable. The type index could be - later restored with the [refine] function. - *) val forget : 'a t -> unit t + (** [forget s] forgets the type index associated with the sort [s]. + This is effectively an upcasting function, that could be used when the + typing information is not necessary anymore or is not representable. + The type index could be later restored with the [refine] function. *) - (** [same x y] is true if [x] and [y] are of the same structure. *) val same : 'a t -> 'b t -> bool + (** [same x y] is true if [x] and [y] are of the same structure. *) - - (** prints the sort. *) val pp : formatter -> 'a t -> unit - + (** prints the sort. *) (** Sorts with erased type indices. - This module enables construction of complex data structures - on sorts, like maps, sets, etc, e.g., + This module enables construction of complex data structures on sorts, + like maps, sets, etc, e.g., [let sorts = Set.empty (module Value.Sort.Top)] - Since such structures are required to be monomorphic, the - sort type index should be removed using the [forget] function, - before a sort could be stored in it. + Since such structures are required to be monomorphic, the sort type + index should be removed using the [forget] function, before a sort + could be stored in it. - Note, that the type index is only removed from the meta - language (OCaml) type, but is preserved in the value term, - so it could be reconstructed (refined) later. - *) + Note, that the type index is only removed from the meta language + (OCaml) type, but is preserved in the value term, so it could be + reconstructed (refined) later. *) module Top : sig type t = unit sort [@@deriving bin_io, compare, sexp] + include Base.Comparable.S with type t := t end - (** The name registry. - Names of symbols must be unique as the name is used as a - witness of authenticity of the sort. Once obtained, the name - should be kept secret beyond the module signature. + Names of symbols must be unique as the name is used as a witness of + authenticity of the sort. Once obtained, the name should be kept + secret beyond the module signature. - See the Example section of the [Sort] module documentation - for more information. - *) + See the Example section of the [Sort] module documentation for more + information. *) module Name : sig type t - + val declare : ?package:string -> string -> name (** [declare ?package name] declares a new [name]. - The declared name must be unique to the [package]. If such - name is already declared in the [package], then the - declaration fails. - *) - val declare : ?package:string -> string -> name + The declared name must be unique to the [package]. If such name is + already declared in the [package], then the declaration fails. *) + include Base.Comparable.S with type t := t end end end - (** The denotation of statements. - An effect is a Knowledge value that is used to give a denotation - to the language forms that do not evaluate to values but change - the state of the system, i.e., to what is usually called - "statement". - - All effects belong to the same Knowledge class and share the - same set of properties, with each property being a specific - denotation provided by on or more theories. For example, - [bap.std:bil] slot holds the denotation of a value in terms of - the BIL statements. - *) - module Effect : sig - + An effect is a Knowledge value that is used to give a denotation to the + language forms that do not evaluate to values but change the state of the + system, i.e., to what is usually called "statement". - (** a type for the effect sort *) + All effects belong to the same Knowledge class and share the same set of + properties, with each property being a specific denotation provided by on + or more theories. For example, [bap.std:bil] slot holds the denotation of + a value in terms of the BIL statements. *) + module Effect : sig type +'a sort + (** a type for the effect sort *) - - (** the class of effects. *) type cls + (** the class of effects. *) + type 'a t = (cls, 'a sort) KB.cls KB.value + (** the effect type is an instance of the Knowledge.value *) - (** the effect type is an instance of the Knowledge.value *) - type 'a t = (cls,'a sort) KB.cls KB.value - - - (** the class of all effects. *) - val cls : (cls,unit) KB.cls - + val cls : (cls, unit) KB.cls + (** the class of all effects. *) - (** [empty s] creates an empty effect value. - - The empty effect denotes an absence of any specific knowledge - about the effects produced by a term. - *) val empty : 'a sort -> 'a t + (** [empty s] creates an empty effect value. + The empty effect denotes an absence of any specific knowledge about the + effects produced by a term. *) - (** [sort eff] returns the sort of the effect [eff]. *) val sort : 'a t -> 'a sort - + (** [sort eff] returns the sort of the effect [eff]. *) (** Effect sorts. - The sort of an effect holds static information that is common - to all effects of that sort. + The sort of an effect holds static information that is common to all + effects of that sort. - We distinguish two kinds of effects - [ctrl] effects that affect - which instructions will be executed next and [data] effects that - affect only the values in the computer storage. + We distinguish two kinds of effects - [ctrl] effects that affect which + instructions will be executed next and [data] effects that affect only + the values in the computer storage. - The [unit] effect represents an effect that is a mixture of - [ctrl] and [data] effects. - *) + The [unit] effect represents an effect that is a mixture of [ctrl] and + [data] effects. *) module Sort : sig type +'a t = 'a sort type data = private Data type ctrl = private Ctrl - - (** [data kind] defines a data effect of the [kind]. *) val data : string -> data t + (** [data kind] defines a data effect of the [kind]. *) - - (** [ctrl kind] defines a ctrl effect of the [kind]. *) val ctrl : string -> ctrl t + (** [ctrl kind] defines a ctrl effect of the [kind]. *) - - (** [top] is a set of all possible effects. - - This sort indicates that the statement can have any effect. - *) val top : unit t + (** [top] is a set of all possible effects. + This sort indicates that the statement can have any effect. *) - (** [bot] is an empty set of effects. - - This sort indicates that the statement doesn't have any - observable effects, thus it could be coerced to any other - sort. - *) val bot : 'a t + (** [bot] is an empty set of effects. + This sort indicates that the statement doesn't have any observable + effects, thus it could be coerced to any other sort. *) - (** [both s1 s2] an effect of both [s1] and [s2] *) val both : 'a t -> 'a t -> 'a t + (** [both s1 s2] an effect of both [s1] and [s2] *) + val ( && ) : 'a t -> 'a t -> 'a t + (** [s1 && s2] is [both s1 s2]. *) - (** [s1 && s2] is [both s1 s2]. *) - val (&&) : 'a t -> 'a t -> 'a t - - - (** [union [s1;...;sN] is [s1 && ... && sN]. *) val union : 'a t list -> 'a t + (** [union [s1;...;sN]] is [s1 && ... && sN]. *) - - (** [join xs ys] is [union [union xs; union ys ]]. *) val join : 'a t list -> 'b t list -> unit t + (** [join xs ys] is [union [union xs; union ys ]]. *) - + val order : 'a t -> 'b t -> KB.Order.partial (** [order xs ys] orders effects by the order of inclusion. - [xs] is before [ys] if [ys] includes all effects of [xs], - otherwise. + [xs] is before [ys] if [ys] includes all effects of [xs], otherwise. *) - val order : 'a t -> 'b t -> KB.Order.partial - - (** the register read effect. *) val rreg : data t + (** the register read effect. *) - (** the register write effect. *) val wreg : data t + (** the register write effect. *) - (** the memory read effect. *) val rmem : data t + (** the memory read effect. *) - (** is the memory write effect. *) val wmem : data t + (** is the memory write effect. *) - (** the memory barrier effect *) val barr : data t + (** the memory barrier effect *) - (** the normal control flow effect *) val fall : ctrl t + (** the normal control flow effect *) - (** the jump effect. *) val jump : ctrl t + (** the jump effect. *) - (** the conditional branching effect *) val cjmp : ctrl t + (** the conditional branching effect *) end end type 'a value = 'a Value.t - type 'a effect = 'a Effect.t + type 'a effect_ = 'a Effect.t (** The sort for boolean values. - Booleans are one bit values. - *) + Booleans are one bit values. *) module Bool : sig type t - (** the Bool sort. *) val t : t Value.sort + (** the Bool sort. *) - - (** [refine s] if [s] is [Bool] then returns [Some t]. *) val refine : unit Value.sort -> t Value.sort option + (** [refine s] if [s] is [Bool] then returns [Some t]. *) end - - (** Sorts of bitvectors *) + (** Sorts of bitvectors *) module Bitv : sig type 'a t - (** [define size] defines a sort of bitvectors of the given [size]. *) val define : int -> 'a t Value.sort + (** [define size] defines a sort of bitvectors of the given [size]. *) - (** [refine s] if [s] is a bitvector sort, then restores its type. *) val refine : unit Value.sort -> 'a t Value.sort option + (** [refine s] if [s] is a bitvector sort, then restores its type. *) - (** [size s] the [size] argument of [s]. *) val size : 'a t Value.sort -> int + (** [size s] the [size] argument of [s]. *) end - (** Sorts of memories. - A memory is an associative container of bitvectors indexed with - bitvector keys. - *) + A memory is an associative container of bitvectors indexed with bitvector + keys. *) module Mem : sig - type ('a,'b) t - - - (** [define ks vs] a sort of memories with keys of type [ks] and - data of type [vs]. *) - val define : 'a Bitv.t Value.sort -> 'b Bitv.t Value.sort -> ('a,'b) t Value.sort - + type ('a, 'b) t - (** [refine s] if [s] is a memory sort then restores its type. *) - val refine : unit Value.sort -> ('a,'b) t Value.sort option + val define : + 'a Bitv.t Value.sort -> 'b Bitv.t Value.sort -> ('a, 'b) t Value.sort + (** [define ks vs] a sort of memories with keys of type [ks] and data of + type [vs]. *) + val refine : unit Value.sort -> ('a, 'b) t Value.sort option + (** [refine s] if [s] is a memory sort then restores its type. *) - (** [keys s] returns the sort of keys. *) - val keys : ('a,'b) t Value.sort -> 'a Bitv.t Value.sort + val keys : ('a, 'b) t Value.sort -> 'a Bitv.t Value.sort + (** [keys s] returns the sort of keys. *) - (** [vals s] returns the sort of values. *) - val vals : ('a,'b) t Value.sort -> 'b Bitv.t Value.sort + val vals : ('a, 'b) t Value.sort -> 'b Bitv.t Value.sort + (** [vals s] returns the sort of values. *) end - - (** Sorts for floating-point numbers. *) + (** Sorts for floating-point numbers. *) module Float : sig - - - (** Sort describing the representation format of a floating-point number. *) + (** Sort describing the representation format of a floating-point number. *) module Format : sig - type ('r,'s) t - - - (** [define r s] defines a sort given interpretation [r] of bitvector [s]. *) - val define : 'r Value.sort -> 's Bitv.t Value.sort -> ('r,'s) t Value.sort + type ('r, 's) t + val define : + 'r Value.sort -> 's Bitv.t Value.sort -> ('r, 's) t Value.sort + (** [define r s] defines a sort given interpretation [r] of bitvector [s]. + *) + val bits : ('r, 's) t Value.sort -> 's Bitv.t Value.sort (** [bits s] returns the sort of bitvectors that are used by - floating-point numbers of sort [s]. *) - val bits : ('r,'s) t Value.sort -> 's Bitv.t Value.sort + floating-point numbers of sort [s]. *) - - (** [exp s] returns an expression that describes the - interpretation of the bits of the floating-point numbers - represented by the sort [s]. *) - val exp : ('r,'s) t Value.sort -> 'r Value.sort + val exp : ('r, 's) t Value.sort -> 'r Value.sort + (** [exp s] returns an expression that describes the interpretation of the + bits of the floating-point numbers represented by the sort [s]. *) end - type ('r,'s) format = ('r,'s) Format.t + type ('r, 's) format = ('r, 's) Format.t type 'f t - + val define : ('r, 's) format Value.sort -> ('r, 's) format t Value.sort (** [define r s] defines a floating-point sort, indexed by the - floating-point format [r] that gives the interpretation to - the bits of bitvectors of sort [s]. *) - val define : ('r,'s) format Value.sort -> ('r,'s) format t Value.sort + floating-point format [r] that gives the interpretation to the bits of + bitvectors of sort [s]. *) + val refine : unit Value.sort -> ('r, 's) format t Value.sort option + (** [refine s] if [s] is a floating-point sort then restores its type. *) - (** [refine s] if [s] is a floating-point sort then restores its type. *) - val refine : unit Value.sort -> ('r,'s) format t Value.sort option + val format : ('r, 's) format t Value.sort -> ('r, 's) format Value.sort + (** [format s] returns the format of floating-points of sort [s]. *) - - (** [format s] returns the format of floating-points of sort [s]. *) - val format : ('r,'s) format t Value.sort -> ('r,'s) format Value.sort - - - (** [bits s] returns the sort of bitvectors that are used to - represent floating-point numbers of sort [s]. *) - val bits : ('r,'s) format t Value.sort -> 's Bitv.t Value.sort + val bits : ('r, 's) format t Value.sort -> 's Bitv.t Value.sort + (** [bits s] returns the sort of bitvectors that are used to represent + floating-point numbers of sort [s]. *) end - - (** Rounding modes. *) + (** Rounding modes. *) module Rmode : sig type t - - (** The sort of rounding modes. *) val t : t Value.sort + (** The sort of rounding modes. *) - - (** [refine s] if [s] is the rounding mode sort, then restores its type.*) val refine : unit Value.sort -> t Value.sort option + (** [refine s] if [s] is the rounding mode sort, then restores its type.*) end type 'a pure = 'a value knowledge - type 'a eff = 'a effect knowledge - type ('r,'s) format = ('r,'s) Float.format - + type 'a eff = 'a effect_ knowledge + type ('r, 's) format = ('r, 's) Float.format (** Variables. - Variables give names to values, read the {{!vars}Variables} - section for more information. - *) + Variables give names to values, read the {{!vars}Variables} section for + more information. *) module Var : sig type 'a t - type ident [@@deriving bin_io, compare, sexp] - type ord - - (** [define sort name] a global variable with [name] and [sort]. *) val define : 'a Value.sort -> string -> 'a t + (** [define sort name] a global variable with [name] and [sort]. *) - - (** [create s id] a variable with sort [s] and identifier [id]. - - The identifier encodes what kind of variable is created. This - function is usually created by parsers, that parse a - well-formed programs. - *) val create : 'a Value.sort -> ident -> 'a t + (** [create s id] a variable with sort [s] and identifier [id]. + The identifier encodes what kind of variable is created. This function + is usually created by parsers, that parse a well-formed programs. *) - (** [forget v] forgets the type index describing the sort of the variable. *) val forget : 'a t -> unit t + (** [forget v] forgets the type index describing the sort of the variable. + *) - - (** [resort v] changes the sort of the variable. *) val resort : 'a t -> 'b Value.sort -> 'b t + (** [resort v] changes the sort of the variable. *) + val versioned : 'a t -> int -> 'a t (** [versioned v n] creates the [n]th version of the variable [v]. - Variable versions could be used to represent the same variable - under different context or to ensure some normalization of the - program, e.g., SSA. - *) - val versioned: 'a t -> int -> 'a t - - - (** [version v] is the version of the variable [v]. + Variable versions could be used to represent the same variable under + different context or to ensure some normalization of the program, e.g., + SSA. *) - Variable versions could be used to represent the same variable - under different context or to ensure some normalization of the - program, e.g., SSA. *) val version : 'a t -> int + (** [version v] is the version of the variable [v]. + Variable versions could be used to represent the same variable under + different context or to ensure some normalization of the program, e.g., + SSA. *) - (** [ident v] is variable's identifier. *) val ident : 'a t -> ident + (** [ident v] is variable's identifier. *) - - (** [name v] is variable's name *) val name : 'a t -> string + (** [name v] is variable's name *) - - (** [sort v] is variable's sort. *) val sort : 'a t -> 'a Value.sort + (** [sort v] is variable's sort. *) - - (** [is_virtual v] is [true] if [v] is virtual. - - Virtual variables do not have any physical representation. - *) val is_virtual : 'a t -> bool + (** [is_virtual v] is [true] if [v] is virtual. + Virtual variables do not have any physical representation. *) - (** [is_mutable v] is [true] if [v] is mutable. - - Only scoped variables are immutable. - *) val is_mutable : 'a t -> bool + (** [is_mutable v] is [true] if [v] is mutable. + Only scoped variables are immutable. *) - (** [fresh s] creates a fresh virtual mutable variable of sort [s]. *) val fresh : 'a Value.sort -> 'a t knowledge + (** [fresh s] creates a fresh virtual mutable variable of sort [s]. *) - - (** [scoped s] creates a fresh immutable variable of sort [s]. *) val scoped : 'a Value.sort -> ('a t -> 'b pure) -> 'b pure + (** [scoped s] creates a fresh immutable variable of sort [s]. *) - - (** [printf "%a" Theory.Var.pp v] pretty-prints the identifier - of the variable [v]. - - @since 2.3.0 *) val pp : Format.formatter -> 'a t -> unit + (** [printf "%a" Theory.Var.pp v] pretty-prints the identifier of the + variable [v]. + + @since 2.3.0 *) (** Variable identifiers. - Identifiers are compared caseless, otherwise the order loosely - matches the lexicographical order of the textual - representation. Identifiers of virtual variables are ordered - before identifiers of physical variables and mutable virtual - variables are ordered before immutable. Identifiers of a - versioned variable are ordered in the ascending order of their - versions. And identifiers of virtual variables are ordered in - the ascending order of their numeric values, e.g., `#2` - is ordered before `#123`. + Identifiers are compared caseless, otherwise the order loosely matches + the lexicographical order of the textual representation. Identifiers of + virtual variables are ordered before identifiers of physical variables + and mutable virtual variables are ordered before immutable. Identifiers + of a versioned variable are ordered in the ascending order of their + versions. And identifiers of virtual variables are ordered in the + ascending order of their numeric values, e.g., `#2` is ordered before + `#123`. @before 2.4.0 the ordering was unspecified but wasn't caseless. - @since 2.4.0 the ordering is caseless - *) + @since 2.4.0 the ordering is caseless *) module Ident : sig type t = ident [@@deriving bin_io, compare, sexp] + include Stringable.S with type t := t - include Base.Comparable.S with type t := t - and type comparator_witness = ord - end + include + Base.Comparable.S with type t := t and type comparator_witness = ord + end (** Variables with erased sort index. This module enables construction of complex data structures on variables, e.g., [Set.empty (module Theory.Var.Top)]. - The variables are ordered by their identifiers so that two - variables with the same name but different sorts are compared - equal. - *) + The variables are ordered by their identifiers so that two variables + with the same name but different sorts are compared equal. *) module Top : sig type nonrec t = unit t [@@deriving bin_io, compare, sexp] + include Base.Comparable.S with type t := t end end @@ -1209,118 +1042,109 @@ module Theory : sig type data = Effect.Sort.data type ctrl = Effect.Sort.ctrl - (** a concrete representation of words in the Core Theory. *) type word = Bitvec.t + (** a concrete representation of words in the Core Theory. *) - (** a concrete representation of variables. *) type 'a var = 'a Var.t + (** a concrete representation of variables. *) - (** a class index for class of programs. *) type program + (** a class index for class of programs. *) - (** label is an object of the program class. *) type label = program KB.Object.t + (** label is an object of the program class. *) + type target (** The target execution system. @since 2.2.0 *) - type target - (** The ordering of the bytes. - @since 2.2.0 *) type endianness + (** The ordering of the bytes. + @since 2.2.0 *) + type system (** The operating system. @since 2.2.0 *) - type system - (** The application binary interface. - @since 2.2.0 *) type abi + (** The application binary interface. + @since 2.2.0 *) + type fabi (** The floating-point ABI. @since 2.2.0*) - type fabi + type filetype (** The file type. @since 2.2.0 *) - type filetype + type compiler (** source to code transformer. @since 2.2.0*) - type compiler + type language (** the name of the code encoding. @since 2.2.0 *) - type language - - (** a target-specific role of a variable or other entity. - @since 2.3.0 - *) type role + (** a target-specific role of a variable or other entity. + @since 2.3.0 *) + type alias (** a description of the register aliasing. See {!Alias}. - @since 2.4.0 - *) - type alias - + @since 2.4.0 *) + type ('a, 'k) origin (** a description of the origin of an aliased register. See {!Origin}. - @since 2.4.0 - *) - type ('a,'k) origin + @since 2.4.0 *) (** The semantics of programs. - The semantics of a program is denoted with effects that this - program produces, so effectively [Program.Semantics = Effect], - but we reexport it in a separate module here, to separate the - concerns. + The semantics of a program is denoted with effects that this program + produces, so effectively [Program.Semantics = Effect], but we reexport it + in a separate module here, to separate the concerns. - @since 2.2.0 (was {!Program.Semantics} before that - *) + @since 2.2.0 (was {!Program.Semantics} before that *) module Semantics : sig type cls = Effect.cls type t = unit Effect.t - (** the class of program semantics values. *) val cls : (cls, unit Effect.sort) Knowledge.cls + (** the class of program semantics values. *) - (** the slot to store program semantics. *) val slot : (program, t) Knowledge.slot + (** the slot to store program semantics. *) + val value : (cls, unit Value.t) Knowledge.slot (** the value of the effect. Represents the value of side-effectful compuations. @since 2.3.0 *) - val value : (cls, unit Value.t) Knowledge.slot - (** the memory contents of the program. *) val code : (cls, string option) Knowledge.slot + (** the memory contents of the program. *) include Knowledge.Value.S with type t := t end - (** The denotation of programs. - Values of class [program] are used to express the semantics of - programs. With a [label], which is an abstract pointer to a - program, we associate a value of type [Program.t] which denotes - the program that will be executed when the control will be - passed to that label. - *) + Values of class [program] are used to express the semantics of programs. + With a [label], which is an abstract pointer to a program, we associate a + value of type [Program.t] which denotes the program that will be executed + when the control will be passed to that label. *) module Program : sig - type t = (program,unit) KB.cls KB.value - val cls : (program,unit) KB.cls + type t = (program, unit) KB.cls KB.value + + val cls : (program, unit) KB.cls module Semantics = Semantics [@@deprecated "[since 2020-10] use [Semantics] (without Program)"] @@ -1328,124 +1152,134 @@ module Theory : sig include Knowledge.Value.S with type t := t end - (** The source code artifact of a compilation unit. - Contains the information about the source code of a program - unit. Note, that it is not an attribute of a program that - denotes the semantics of that program, but an artifact - that is associated with the compile unit. - - The information about the source code is represented as an - extesnsible {!KB.Value.t}. To add a new property of the Source - class use {!KB.Class.property}, - to access existing properties use {!KB.Value.get} - and {!KB.Value.put}. + Contains the information about the source code of a program unit. Note, + that it is not an attribute of a program that denotes the semantics of + that program, but an artifact that is associated with the compile unit. + The information about the source code is represented as an extesnsible + {!KB.Value.t}. To add a new property of the Source class use + {!KB.Class.property}, to access existing properties use {!KB.Value.get} + and {!KB.Value.put}. - @since 2.2.0 - *) + @since 2.2.0 *) module Source : sig type cls - include KB.Value.S with type t = (cls,unit) KB.cls KB.Value.t - (** the class index for the core-theory:source class *) - val cls : (cls,unit) KB.cls + include KB.Value.S with type t = (cls, unit) KB.cls KB.Value.t + + val cls : (cls, unit) KB.cls + (** the class index for the core-theory:source class *) - (** the language of the source code *) - val language : (cls,language) KB.slot + val language : (cls, language) KB.slot + (** the language of the source code *) - (** the source code text *) - val code : (cls,string) KB.slot + val code : (cls, string) KB.slot + (** the source code text *) - (** the file name of the unit's source code *) - val file : (cls,string option) KB.slot + val file : (cls, string option) KB.slot + (** the file name of the unit's source code *) end (** Description of the execution target. - An abstract description of the system on which a program is - intended to be run. The description precisely describes various - architectual and microarchitectual details of the target system, - and could be extended with further details either internally, by - adding more fields (and functions to this module) or storing - those options in [Options.t]; or externally, by maintaining - finite mappings from [Target.t] to corresponding properties. + An abstract description of the system on which a program is intended to be + run. The description precisely describes various architectual and + microarchitectual details of the target system, and could be extended with + further details either internally, by adding more fields (and functions to + this module) or storing those options in [Options.t]; or externally, by + maintaining finite mappings from [Target.t] to corresponding properties. - The [Target.t] has a lightweight immediate representation, which - is portable across OCaml runtime and persistent across versions - of BAP and OCaml. + The [Target.t] has a lightweight immediate representation, which is + portable across OCaml runtime and persistent across versions of BAP and + OCaml. - @since 2.2.0 *) + @since 2.2.0 *) module Target : sig + type t = target [@@deriving bin_io, compare, sexp] (** the abstract type representing the target. - This type is a unique identifier of the target, - represented as [KB.Name.t] underneath the hood. - *) - type t = target [@@deriving bin_io, compare, sexp] + This type is a unique identifier of the target, represented as + [KB.Name.t] underneath the hood. *) + include Base.Comparable.S with type t := t include Binable.S with type t := t include Stringable.S with type t := t include Pretty_printer.S with type t := t - (** The set of target-specific options. *) - type options = (options_cls,unit) KB.cls KB.Value.t and options_cls - - - (** [declare ?package name] declares a new execution target. - - The packaged name of the target must be unique and the target - shall be declared during the module registration (commonly as - a toplevel definition of a module that implements the target - support package). - - The newly declared target inherits all the parameters from the - [parent] target unless they are explicitly overriden. + type options = (options_cls, unit) KB.cls KB.Value.t + (** The set of target-specific options. *) - For the description of parameters see the corresponding - accessor functions in this module. + and options_cls - If the target architecture has register aliases, i.e., - registers that correspond to some parts of other registers, - then they should be properly described with the [aliasing] - parameter, using the {!Alias} language, in which each register - that has aliases is structurally defined in terms of its - subparts. - - @since 2.3.0 has the [regs] optional parameter. - @since 2.4.0 has the [aliasing] optional parameters. - *) val declare : - ?parent:t -> (** defaults to [unknown] *) - ?bits:int -> (** defaults to [32] *) - ?byte:int -> (** defaults to [8] *) - ?data:_ Mem.t Var.t -> (** defaults to [mem : Mem(bits,byte)] *) - ?code:_ Mem.t Var.t -> (** defaults to [mem : Mem(bits,byte)] *) - ?data_alignment:int -> (** defaults to 8 bit *) - ?code_alignment:int -> (** defaults to 8 bit *) - ?vars:unit Var.t list -> (** defaults to [[]] *) - ?regs:(role list * unit Var.t list) list -> (** defaults to [[]] *) + ?parent:t -> + (* defaults to [unknown] *) + ?bits:int -> + (* defaults to [32] *) + ?byte:int -> + (* defaults to [8] *) + ?data:_ Mem.t Var.t -> + (* defaults to [mem : Mem(bits,byte)] *) + ?code:_ Mem.t Var.t -> + (* defaults to [mem : Mem(bits,byte)] *) + ?data_alignment:int -> + (* defaults to 8 bit *) + ?code_alignment:int -> + (* defaults to 8 bit *) + ?vars:unit Var.t list -> + (* defaults to [[]] *) + ?regs:(role list * unit Var.t list) list -> + (* defaults to [[]] *) ?aliasing:alias list -> - ?endianness:endianness -> (** defaults to [Endian.big] *) - ?system:system -> (** defaults to [System.unknown] *) - ?abi:abi -> (** defaults to [Abi.unknown] *) - ?fabi:fabi -> (** defaults to [Fabi.unknown] *) - ?filetype:filetype -> (** defaults to [Filetype.unknown] *) - ?options:options -> (** defaults to [Options.empty] *) - ?nicknames:string list -> (** defaults to [[]] *) - ?package:string -> (** defaults to ["user"] *) - string -> t + ?endianness:endianness -> + (* defaults to [Endian.big] *) + ?system:system -> + (* defaults to [System.unknown] *) + ?abi:abi -> + (* defaults to [Abi.unknown] *) + ?fabi:fabi -> + (* defaults to [Fabi.unknown] *) + ?filetype:filetype -> + (* defaults to [Filetype.unknown] *) + ?options:options -> + (* defaults to [Options.empty] *) + ?nicknames:string list -> + (* defaults to [[]] *) + ?package:string -> + (* defaults to ["user"] *) + string -> + t + (** [declare ?package name] declares a new execution target. + + The packaged name of the target must be unique and the target shall be + declared during the module registration (commonly as a toplevel + definition of a module that implements the target support package). + + The newly declared target inherits all the parameters from the [parent] + target unless they are explicitly overriden. + For the description of parameters see the corresponding accessor + functions in this module. + If the target architecture has register aliases, i.e., registers that + correspond to some parts of other registers, then they should be + properly described with the [aliasing] parameter, using the {!Alias} + language, in which each register that has aliases is structurally + defined in terms of its subparts. + + @since 2.3.0 has the [regs] optional parameter. + @since 2.4.0 has the [aliasing] optional parameters. *) (** [register t] generates and registers a list of targets. - For a list of possible target properties generate a set of - unique targets and declare them. To generate a unique name the - following scheme is used, + For a list of possible target properties generate a set of unique + targets and declare them. To generate a unique name the following scheme + is used, - {v---+