From 3e4db7a8d862cac156afc7af3f1bb5df2c75da7c Mon Sep 17 00:00:00 2001 From: "Ben L. Titzer" Date: Mon, 8 Dec 2025 13:19:41 -0500 Subject: [PATCH 1/3] Implement parser support --- interpreter/runtime/memory.ml | 17 ++++----- interpreter/runtime/memory.mli | 2 -- interpreter/text/lexer.mll | 1 + interpreter/text/parser.mly | 35 ++++++++++++++----- interpreter/valid/valid.ml | 14 +++++--- .../custom-page-sizes/custom-page-sizes.wast | 2 +- test/core/custom-page-sizes/memory_max.wast | 16 +++++++-- .../custom-page-sizes/memory_max_i64.wast | 20 ++++++++--- 8 files changed, 78 insertions(+), 29 deletions(-) diff --git a/interpreter/runtime/memory.ml b/interpreter/runtime/memory.ml index 9e73d6d2a..db32ae56d 100644 --- a/interpreter/runtime/memory.ml +++ b/interpreter/runtime/memory.ml @@ -17,17 +17,18 @@ exception SizeOverflow exception SizeLimit exception OutOfMemory -let page_size = 0x10000L (* 64 KiB *) - let valid_limits {min; max} = match max with | None -> true | Some m -> I64.le_u min m -let valid_size at i = - match at with - | I32AT -> I64.le_u i 0xffffL - | I64AT -> true +let valid_size at pt i = + match pt with + | PageT 1 -> true + | PageT ps -> + match at with + | I32AT -> I64.le_u i (Int64.div 0xffffffffL (Int64.of_int ps)) + | I64AT -> true let create n (PageT ps) = try @@ -39,7 +40,7 @@ let create n (PageT ps) = let alloc (MemoryT (at, lim, pt) as ty) = assert Free.((memorytype ty).types = Set.empty); - if not (valid_size at lim.min) then raise SizeOverflow; + if not (valid_size at pt lim.min) then raise SizeOverflow; if not (valid_limits lim) then raise Type; {ty; content = create lim.min pt} @@ -65,7 +66,7 @@ let grow mem delta = let new_size = Int64.add old_size delta in if I64.gt_u old_size new_size then raise SizeOverflow else let lim' = {lim with min = new_size} in - if not (valid_size at new_size) then raise SizeOverflow else + if not (valid_size at pt new_size) then raise SizeOverflow else if not (valid_limits lim') then raise SizeLimit else let after = create new_size pt in let dim = Array1_64.dim mem.content in diff --git a/interpreter/runtime/memory.mli b/interpreter/runtime/memory.mli index 5bbc8a77e..0dae66766 100644 --- a/interpreter/runtime/memory.mli +++ b/interpreter/runtime/memory.mli @@ -14,8 +14,6 @@ exception SizeOverflow exception SizeLimit exception OutOfMemory -val page_size : int64 - val alloc : memorytype -> memory (* raises Type, SizeOverflow, OutOfMemory *) val type_of : memory -> memorytype val addrtype_of : memory -> addrtype diff --git a/interpreter/text/lexer.mll b/interpreter/text/lexer.mll index 653c93a5b..f11ba87b1 100644 --- a/interpreter/text/lexer.mll +++ b/interpreter/text/lexer.mll @@ -780,6 +780,7 @@ rule token = parse | "start" -> START | "import" -> IMPORT | "export" -> EXPORT + | "pagesize" -> PAGESIZE | "module" -> MODULE | "binary" -> BIN diff --git a/interpreter/text/parser.mly b/interpreter/text/parser.mly index 0ee8020c3..edd071016 100644 --- a/interpreter/text/parser.mly +++ b/interpreter/text/parser.mly @@ -321,6 +321,7 @@ let parse_annots (m : module_) : Custom.section list = %token VEC_SHUFFLE %token Ast.instr'> VEC_EXTRACT VEC_REPLACE %token FUNC START TYPE PARAM RESULT LOCAL GLOBAL +%token PAGESIZE %token TABLE ELEM MEMORY TAG DATA DECLARE OFFSET ITEM IMPORT EXPORT %token MODULE BIN QUOTE DEFINITION INSTANCE %token SCRIPT REGISTER INVOKE GET @@ -465,8 +466,21 @@ subtype : tabletype : | addrtype limits reftype { fun c -> TableT ($1, $2, $3 c) } +pagetype : + | LPAR PAGESIZE NAT RPAR + { let v' = + try (int_of_string $3) + with Failure _ -> + error (at $sloc) "invalid custom page size" + in + if not (v' > 0 && v' land (v' - 1) = 0) then + error (at $sloc) "invalid custom page size"; + PageT v' + } + | {PageT 0x10000} + memorytype : - | addrtype limits { fun c -> MemoryT ($1, $2, PageT 0x10000) } + | addrtype limits pagetype { fun c -> MemoryT ($1, $2, $3) } limits : | NAT { {min = nat64 $1 $loc($1); max = None} } @@ -1117,6 +1131,18 @@ memory : fun () -> $4 c x $sloc } memory_fields : + | addrtype pagetype LPAR DATA string_list RPAR /* Sugar */ + { fun c x loc -> + let len64 = (Int64.of_int (String.length $5)) in + let size = + match $2 with + | PageT 0 -> len64 (* will be a validation error *) + | PageT 1 -> len64 + | PageT ps -> Int64.(div (add len64 (sub (of_int ps) 1L)) (of_int ps)) in + let offset = [at_const $1 (0L @@ loc) @@ loc] @@ loc in + [Memory (MemoryT ($1, {min = size; max = Some size}, $2)) @@ loc], + [Data ($5, Active (x, offset) @@ loc) @@ loc], + [], [] } | memorytype { fun c x loc -> [Memory ($1 c) @@ loc], [], [], [] } | inline_import memorytype /* Sugar */ @@ -1126,13 +1152,6 @@ memory_fields : | inline_export memory_fields /* Sugar */ { fun c x loc -> let mems, data, ims, exs = $2 c x loc in mems, data, ims, $1 (MemoryX x) c :: exs } - | addrtype LPAR DATA string_list RPAR /* Sugar */ - { fun c x loc -> - let size = Int64.(div (add (of_int (String.length $4)) 65535L) 65536L) in - let offset = [at_const $1 (0L @@ loc) @@ loc] @@ loc in - [Memory (MemoryT ($1, {min = size; max = Some size}, PageT 0x10000)) @@ loc], - [Data ($4, Active (x, offset) @@ loc) @@ loc], - [], [] } elemkind : diff --git a/interpreter/valid/valid.ml b/interpreter/valid/valid.ml index c22a1d0cf..e4b166716 100644 --- a/interpreter/valid/valid.ml +++ b/interpreter/valid/valid.ml @@ -105,7 +105,7 @@ let check_limits {min; max} range at msg = "size minimum must not be greater than maximum" let check_pagetype (PageT ps) at = - require (ps = 0x10000 || ps = 1) at "page size must be 1 or 64KiB" + require (ps = 0x10000 || ps = 1) at "invalid custom page size" let check_numtype (c : context) (t : numtype) at = () @@ -201,9 +201,15 @@ let check_globaltype (c : context) (gt : globaltype) at = let check_memorytype (c : context) (mt : memorytype) at = let MemoryT (at_, lim, pt) = mt in let sz, s = - match at_ with - | I32AT -> 0x1_0000L, "2^16 pages (4 GiB) for i32" - | I64AT -> 0x1_0000_0000_0000L, "2^48 pages (256 TiB) for i64" + match pt with + | PageT 0x10000 -> + (match at_ with + | I32AT -> 0x1_0000L, "2^16 pages (4 GiB) for i32" + | I64AT -> 0x1_0000_0000_0000L, "2^48 pages (256 TiB) for i64") + | _ -> (* TODO: divide by page size, what about error msg? *) + (match at_ with + | I32AT -> 0xFFFF_FFFFL, "2^32 - 1 bytes for i32" + | I64AT -> 0xFFFF_FFFF_FFFF_FFFFL, "2^64 - 1 bytes for i64") in check_limits lim sz at ("memory size must be at most " ^ s); check_pagetype pt at diff --git a/test/core/custom-page-sizes/custom-page-sizes.wast b/test/core/custom-page-sizes/custom-page-sizes.wast index 332051daa..b8ffd34e8 100644 --- a/test/core/custom-page-sizes/custom-page-sizes.wast +++ b/test/core/custom-page-sizes/custom-page-sizes.wast @@ -110,7 +110,7 @@ ;; Inline data segments ;; pagesize 0 -(assert_malformed (module quote "(memory (pagesize 0) (data))") "invalid custom page size") +(assert_malformed (module quote "(module (memory (pagesize 0) (data)))") "invalid custom page size") ;; pagesize 1 (module diff --git a/test/core/custom-page-sizes/memory_max.wast b/test/core/custom-page-sizes/memory_max.wast index 7f3b497d6..5ae6fe814 100644 --- a/test/core/custom-page-sizes/memory_max.wast +++ b/test/core/custom-page-sizes/memory_max.wast @@ -19,14 +19,26 @@ (module (import "test" "unknown" (func)) (memory 0xFFFF_FFFF (pagesize 1))) - "unknown import") + "incompatible import type") + +;; i32 (pagesize 1) +(assert_unlinkable + (module + (import "test" "unknown" (memory 0xFFFF_FFFF (pagesize 1)))) + "incompatible import type") ;; i32 (default pagesize) (assert_unlinkable (module (import "test" "unknown" (func)) (memory 65536 (pagesize 65536))) - "unknown import") + "incompatible import type") + +;; i32 (default pagesize) +(assert_unlinkable + (module + (import "test" "unknown" (memory 65536 (pagesize 65536)))) + "incompatible import type") ;; Memory size just over the maximum. diff --git a/test/core/custom-page-sizes/memory_max_i64.wast b/test/core/custom-page-sizes/memory_max_i64.wast index 81e9aaf5c..cb18fc62b 100644 --- a/test/core/custom-page-sizes/memory_max_i64.wast +++ b/test/core/custom-page-sizes/memory_max_i64.wast @@ -17,16 +17,28 @@ ;; i64 (pagesize 1) (assert_unlinkable (module - (import "test" "import" (func)) + (import "test" "unknown" (func)) (memory i64 0xFFFF_FFFF_FFFF_FFFF (pagesize 1))) - "unknown import") + "incompatible import type") + +;; i64 (pagesize 1) +(assert_unlinkable + (module + (import "test" "unknown" (memory i64 0xFFFF_FFFF_FFFF_FFFF (pagesize 1)))) + "incompatible import type") ;; i64 (default pagesize) (assert_unlinkable (module (import "test" "unknown" (func)) (memory i64 0x1_0000_0000_0000 (pagesize 65536))) - "unknown import") + "incompatible import type") + +;; i64 (default pagesize) +(assert_unlinkable + (module + (import "test" "unknown" (memory i64 0x1_0000_0000_0000 (pagesize 65536)))) + "incompatible import type") ;; Memory size just over the maximum. ;; @@ -36,7 +48,7 @@ ;; i64 (pagesize 1) (assert_malformed (module quote "(memory i64 0x1_0000_0000_0000_0000 (pagesize 1))") - "constant out of range") + "i64 constant out of range") ;; i64 (default pagesize) (assert_invalid From d35461a2b2a0f22035a681e1eb727ac333026cf2 Mon Sep 17 00:00:00 2001 From: "Ben L. Titzer" Date: Tue, 9 Dec 2025 11:50:18 -0500 Subject: [PATCH 2/3] Fix parser ambiguity --- interpreter/text/parser.mly | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/interpreter/text/parser.mly b/interpreter/text/parser.mly index edd071016..a54ca723f 100644 --- a/interpreter/text/parser.mly +++ b/interpreter/text/parser.mly @@ -469,17 +469,17 @@ tabletype : pagetype : | LPAR PAGESIZE NAT RPAR { let v' = - try (int_of_string $3) - with Failure _ -> - error (at $sloc) "invalid custom page size" + try (int_of_string $3) + with Failure _ -> + error (at $sloc) "invalid custom page size" in if not (v' > 0 && v' land (v' - 1) = 0) then - error (at $sloc) "invalid custom page size"; + error (at $sloc) "invalid custom page size"; PageT v' } - | {PageT 0x10000} memorytype : + | addrtype limits { fun c -> MemoryT ($1, $2, PageT 0x10000) } | addrtype limits pagetype { fun c -> MemoryT ($1, $2, $3) } limits : @@ -1131,18 +1131,6 @@ memory : fun () -> $4 c x $sloc } memory_fields : - | addrtype pagetype LPAR DATA string_list RPAR /* Sugar */ - { fun c x loc -> - let len64 = (Int64.of_int (String.length $5)) in - let size = - match $2 with - | PageT 0 -> len64 (* will be a validation error *) - | PageT 1 -> len64 - | PageT ps -> Int64.(div (add len64 (sub (of_int ps) 1L)) (of_int ps)) in - let offset = [at_const $1 (0L @@ loc) @@ loc] @@ loc in - [Memory (MemoryT ($1, {min = size; max = Some size}, $2)) @@ loc], - [Data ($5, Active (x, offset) @@ loc) @@ loc], - [], [] } | memorytype { fun c x loc -> [Memory ($1 c) @@ loc], [], [], [] } | inline_import memorytype /* Sugar */ @@ -1152,7 +1140,18 @@ memory_fields : | inline_export memory_fields /* Sugar */ { fun c x loc -> let mems, data, ims, exs = $2 c x loc in mems, data, ims, $1 (MemoryX x) c :: exs } - + | addrtype pagetype LPAR DATA string_list RPAR /* Sugar */ + { fun c x loc -> + let len64 = (Int64.of_int (String.length $5)) in + let size = + match $2 with + | PageT 0 -> len64 (* will be a validation error *) + | PageT 1 -> len64 + | PageT ps -> Int64.(div (add len64 (sub (of_int ps) 1L)) (of_int ps)) in + let offset = [at_const $1 (0L @@ loc) @@ loc] @@ loc in + [Memory (MemoryT ($1, {min = size; max = Some size}, $2)) @@ loc], + [Data ($5, Active (x, offset) @@ loc) @@ loc], + [], [] } elemkind : | FUNC { (NoNull, FuncHT) } From 542432853be7f1ddb5617d7267454fcc707d9430 Mon Sep 17 00:00:00 2001 From: "Ben L. Titzer" Date: Tue, 9 Dec 2025 11:54:19 -0500 Subject: [PATCH 3/3] Address other comments --- interpreter/text/parser.mly | 4 ++-- interpreter/valid/valid.ml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/interpreter/text/parser.mly b/interpreter/text/parser.mly index a54ca723f..19c94aec1 100644 --- a/interpreter/text/parser.mly +++ b/interpreter/text/parser.mly @@ -473,7 +473,7 @@ pagetype : with Failure _ -> error (at $sloc) "invalid custom page size" in - if not (v' > 0 && v' land (v' - 1) = 0) then + if not (v' > 0 && Lib.Int.is_power_of_two v') then error (at $sloc) "invalid custom page size"; PageT v' } @@ -1142,7 +1142,7 @@ memory_fields : mems, data, ims, $1 (MemoryX x) c :: exs } | addrtype pagetype LPAR DATA string_list RPAR /* Sugar */ { fun c x loc -> - let len64 = (Int64.of_int (String.length $5)) in + let len64 = Int64.of_int (String.length $5) in let size = match $2 with | PageT 0 -> len64 (* will be a validation error *) diff --git a/interpreter/valid/valid.ml b/interpreter/valid/valid.ml index e4b166716..f1bb83145 100644 --- a/interpreter/valid/valid.ml +++ b/interpreter/valid/valid.ml @@ -200,6 +200,7 @@ let check_globaltype (c : context) (gt : globaltype) at = let check_memorytype (c : context) (mt : memorytype) at = let MemoryT (at_, lim, pt) = mt in + check_pagetype pt at; let sz, s = match pt with | PageT 0x10000 -> @@ -211,8 +212,7 @@ let check_memorytype (c : context) (mt : memorytype) at = | I32AT -> 0xFFFF_FFFFL, "2^32 - 1 bytes for i32" | I64AT -> 0xFFFF_FFFF_FFFF_FFFFL, "2^64 - 1 bytes for i64") in - check_limits lim sz at ("memory size must be at most " ^ s); - check_pagetype pt at + check_limits lim sz at ("memory size must be at most " ^ s) let check_tabletype (c : context) (tt : tabletype) at = let TableT (at_, lim, t) = tt in