diff --git a/interpreter/Makefile b/interpreter/Makefile index 3294e26822..0ce6ed8c56 100644 --- a/interpreter/Makefile +++ b/interpreter/Makefile @@ -17,7 +17,7 @@ ZIP = $(NAME).zip JSLIB = wast WINMAKE = winmake.bat -DIRS = util syntax binary text valid runtime exec script host main tests +DIRS = util syntax binary text valid runtime exec custom script host main tests LIBS = FLAGS = -lexflags -ml -cflags '-w +a-4-27-42-44-45-70 -warn-error +a-3' OCBA = ocamlbuild $(FLAGS) $(DIRS:%=-I %) @@ -37,10 +37,10 @@ unopt: $(UNOPT) libopt: _build/$(LIB).cmx _build/$(LIB).cmxa libunopt: _build/$(LIB).cmo _build/$(LIB).cma jslib: $(JSLIB).js -all: unopt opt libunopt libopt test +all: unopt opt libunopt libopt alltest +alltest: unittest test customtest land: $(WINMAKE) all zip: $(ZIP) -smallint: smallint.native ci: land wast.js dunebuild dunebuild: @@ -135,21 +135,18 @@ $(WINMAKE): clean >>$@ -# Executing test suite +# Executing core test suite TESTDIR = ../test/core -# Skip _output directory, since that's a tmp directory, and list all other wast files. TESTFILES = $(shell cd $(TESTDIR); ls *.wast; ls [a-z]*/*.wast) TESTS = $(TESTFILES:%.wast=%) -.PHONY: test debugtest partest dune-test +.PHONY: test debugtest partest dune-test quiettest -test: $(OPT) smallint +test: $(OPT) $(TESTDIR)/run.py --wasm `pwd`/$(OPT) $(if $(JS),--js '$(JS)',) - ./smallint.native -debugtest: $(UNOPT) smallint +debugtest: $(UNOPT) $(TESTDIR)/run.py --wasm `pwd`/$(UNOPT) $(if $(JS),--js '$(JS)',) - ./smallint.native test/%: $(OPT) $(TESTDIR)/run.py --wasm `pwd`/$(OPT) $(if $(JS),--js '$(JS)',) $(TESTDIR)/$*.wast @@ -171,9 +168,52 @@ quiettest/%: $(OPT) ) || \ cat $(@F).out || rm $(@F).out || exit 1 -smallinttest: smallint + +# Executing custom test suite + +CUSTOMTESTDIR = ../test/custom +CUSTOMTESTDIRS = $(shell cd $(CUSTOMTESTDIR); ls -d [a-z]*) +CUSTOMTESTFILES = $(shell cd $(CUSTOMTESTDIR); ls [a-z]*/*.wast) +CUSTOMTESTS = $(CUSTOMTESTFILES:%.wast=%) +CUSTOMOPTS = -c custom $(CUSTOMTESTDIRS:%=-c %) + +.PHONY: customtest customdebugtest custompartest customquiettest + +customtest: $(OPT) + $(TESTDIR)/run.py --wasm `pwd`/$(OPT) --opts '$(CUSTOMOPTS)' $(if $(JS),--js '$(JS)',) $(CUSTOMTESTFILES:%=$(CUSTOMTESTDIR)/%) +customdebugtest: $(UNOPT) + $(TESTDIR)/run.py --wasm `pwd`/$(UNOPT) --opts '$(CUSTOMOPTS)' $(if $(JS),--js '$(JS)',) $(CUSTOMTESTFILES:%=$(CUSTOMTESTDIR)/%) + +customtest/%: $(OPT) + $(TESTDIR)/run.py --wasm `pwd`/$(OPT) --opts '$(CUSTOMOPTS) ' $(if $(JS),--js '$(JS)',) $(CUSTOMTESTDIR)/$*.wast +customdebugtest/%: $(UNOPT) + $(TESTDIR)/run.py --wasm `pwd`/$(UNOPT) --opts '$(CUSTOMOPTS)' $(if $(JS),--js '$(JS)',) $(CUSTOMTESTDIR)/$*.wast + +customrun/%: $(OPT) + ./$(OPT) $(CUSTOMOPTS) $(CUSTOMTESTDIR)/$*.wast +customdebug/%: $(UNOPT) + ./$(UNOPT) $(CUSTOMOPTS) $(CUSTOMTESTDIR)/$*.wast + +custompartest: $(CUSTOMTESTS:%=customquiettest/%) + @echo All custom tests passed. + +customquiettest/%: $(OPT) + @ ( \ + $(TESTDIR)/run.py 2>$(@F).out --wasm `pwd`/$(OPT) --opts '$(CUSTOMOPTS)' $(if $(JS),--js '$(JS)',) $(TESTDIR)/$*.wast && \ + rm $(@F).out \ + ) || \ + cat $(@F).out || rm $(@F).out || exit 1 + + +# Executing unit tests + +.PHONY: unittest + +unittest: smallint @./smallint.native +smallint: smallint.native + dunetest: dune test diff --git a/interpreter/binary/decode.ml b/interpreter/binary/decode.ml index fa3a0ef9e5..d557978290 100644 --- a/interpreter/binary/decode.ml +++ b/interpreter/binary/decode.ml @@ -798,19 +798,19 @@ let id s = let bo = peek s in Lib.Option.map (function - | 0 -> `CustomSection - | 1 -> `TypeSection - | 2 -> `ImportSection - | 3 -> `FuncSection - | 4 -> `TableSection - | 5 -> `MemorySection - | 6 -> `GlobalSection - | 7 -> `ExportSection - | 8 -> `StartSection - | 9 -> `ElemSection - | 10 -> `CodeSection - | 11 -> `DataSection - | 12 -> `DataCountSection + | 0 -> Custom.Custom + | 1 -> Custom.Type + | 2 -> Custom.Import + | 3 -> Custom.Func + | 4 -> Custom.Table + | 5 -> Custom.Memory + | 6 -> Custom.Global + | 7 -> Custom.Export + | 8 -> Custom.Start + | 9 -> Custom.Elem + | 10 -> Custom.Code + | 11 -> Custom.Data + | 12 -> Custom.DataCount | _ -> error s (pos s) "malformed section id" ) bo @@ -828,7 +828,7 @@ let section tag f default s = let type_ s = at func_type s let type_section s = - section `TypeSection (vec type_) [] s + section Custom.Type (vec type_) [] s (* Import section *) @@ -848,13 +848,13 @@ let import s = {module_name; item_name; idesc} let import_section s = - section `ImportSection (vec (at import)) [] s + section Custom.Import (vec (at import)) [] s (* Function section *) let func_section s = - section `FuncSection (vec (at var)) [] s + section Custom.Func (vec (at var)) [] s (* Table section *) @@ -864,7 +864,7 @@ let table s = {ttype} let table_section s = - section `TableSection (vec (at table)) [] s + section Custom.Table (vec (at table)) [] s (* Memory section *) @@ -874,7 +874,7 @@ let memory s = {mtype} let memory_section s = - section `MemorySection (vec (at memory)) [] s + section Custom.Memory (vec (at memory)) [] s (* Global section *) @@ -885,7 +885,7 @@ let global s = {gtype; ginit} let global_section s = - section `GlobalSection (vec (at global)) [] s + section Custom.Global (vec (at global)) [] s (* Export section *) @@ -904,7 +904,7 @@ let export s = {name; edesc} let export_section s = - section `ExportSection (vec (at export)) [] s + section Custom.Export (vec (at export)) [] s (* Start section *) @@ -914,7 +914,7 @@ let start s = {sfunc} let start_section s = - section `StartSection (opt (at start) true) None s + section Custom.Start (opt (at start) true) None s (* Code section *) @@ -939,7 +939,7 @@ let code _ s = {locals; body; ftype = -1l @@ no_region} let code_section s = - section `CodeSection (vec (at (sized code))) [] s + section Custom.Code (vec (at (sized code))) [] s (* Element section *) @@ -1012,7 +1012,7 @@ let elem s = | _ -> error s (pos s - 1) "malformed elements segment kind" let elem_section s = - section `ElemSection (vec (at elem)) [] s + section Custom.Elem (vec (at elem)) [] s (* Data section *) @@ -1034,7 +1034,7 @@ let data s = | _ -> error s (pos s - 1) "malformed data segment kind" let data_section s = - section `DataSection (vec (at data)) [] s + section Custom.Data (vec (at data)) [] s (* DataCount section *) @@ -1043,62 +1043,64 @@ let data_count s = Some (u32 s) let data_count_section s = - section `DataCountSection data_count None s + section Custom.DataCount data_count None s (* Custom section *) -let custom size s = +let custom place size s = let start = pos s in - let id = name s in - let bs = get_string (size - (pos s - start)) s in - Some (id, bs) + let name = name s in + let content = get_string (size - (pos s - start)) s in + Custom.{name; content; place} -let custom_section s = - section_with_size `CustomSection custom None s +let some_custom place size s = + Some (at (custom place size) s) -let non_custom_section s = - match id s with - | None | Some `CustomSection -> None - | _ -> skip 1 s; sized skip s; Some () +let custom_section place s = + section_with_size Custom.Custom (some_custom place) None s (* Modules *) -let rec iterate f s = if f s <> None then iterate f s +let rec iterate f s = + match f s with + | None -> [] + | Some x -> x :: iterate f s let magic = 0x6d736100l let module_ s = + let open Custom in let header = word32 s in require (header = magic) s 0 "magic header not detected"; let version = word32 s in require (version = Encode.version) s 4 "unknown binary version"; - iterate custom_section s; + let customs = iterate (custom_section (Before Type)) s in let types = type_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Type)) s in let imports = import_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Import)) s in let func_types = func_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Func)) s in let tables = table_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Table)) s in let memories = memory_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Memory)) s in let globals = global_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Global)) s in let exports = export_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Export)) s in let start = start_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Start)) s in let elems = elem_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Elem)) s in let data_count = data_count_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After DataCount)) s in let func_bodies = code_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Code)) s in let datas = data_section s in - iterate custom_section s; + let customs = customs @ iterate (custom_section (After Data)) s in require (pos s = len s) s (len s) "unexpected content after last section"; require (List.length func_types = List.length func_bodies) s (len s) "function and code section have inconsistent lengths"; @@ -1108,23 +1110,37 @@ let module_ s = List.for_all Free.(fun f -> (func f).datas = Set.empty) func_bodies) s (len s) "data count section required"; let funcs = - List.map2 (fun t f -> {f.it with ftype = t} @@ f.at) func_types func_bodies - in {types; tables; memories; globals; funcs; imports; exports; elems; datas; start} - - -let decode name bs = at module_ (stream name bs) - -let all_custom tag s = - let header = word32 s in - require (header = magic) s 0 "magic header not detected"; - let version = word32 s in - require (version = Encode.version) s 4 "unknown binary version"; - let rec collect () = - iterate non_custom_section s; - match custom_section s with - | None -> [] - | Some (n, s) when n = tag -> s :: collect () - | Some _ -> collect () - in collect () - -let decode_custom tag name bs = all_custom tag (stream name bs) + List.map2 Source.(fun t f -> {f.it with ftype = t} @@ f.at) + func_types func_bodies + in + {types; tables; memories; globals; funcs; imports; exports; elems; datas; start}, + customs + + +let decode_custom m bs custom = + let open Source in + let Custom.{name; content; place} = custom.it in + match Custom.handler name, Custom.handler (Utf8.decode "custom") with + | Some (module Handler), _ -> + let fmt = Handler.decode m bs custom in + let module S = struct module Handler = Handler let it = fmt end in + [(module S : Custom.Section)] + | None, Some (module Handler') -> + let fmt = Handler'.decode m bs custom in + let module S = struct module Handler = Handler' let it = fmt end in + [(module S : Custom.Section)] + | None, None -> + if !Flags.custom_reject then + raise (Custom.Code (custom.at, + "unknown custom section \"" ^ Utf8.encode name ^ "\"")) + else + [] + +let decode_with_custom name bs = + let m_cs = at module_ (stream name bs) in + let open Source in + let m', cs = m_cs.it in + let m = m' @@ m_cs.at in + m, List.flatten (List.map (decode_custom m bs) cs) + +let decode name bs = fst (decode_with_custom name bs) diff --git a/interpreter/binary/decode.mli b/interpreter/binary/decode.mli index 4460023d25..b02b332364 100644 --- a/interpreter/binary/decode.mli +++ b/interpreter/binary/decode.mli @@ -1,5 +1,4 @@ exception Code of Source.region * string val decode : string -> string -> Ast.module_ (* raises Code *) - -val decode_custom : Ast.name -> string -> string -> string list (* raises Code *) +val decode_with_custom : string -> string -> Ast.module_ * Custom.section list (* raises Code *) diff --git a/interpreter/binary/encode.ml b/interpreter/binary/encode.ml index f5665bb1cd..3e22481d35 100644 --- a/interpreter/binary/encode.ml +++ b/interpreter/binary/encode.ml @@ -930,39 +930,67 @@ struct (* Custom section *) - - let custom (n, bs) = + let custom c = + let Custom.{name = n; content; _} = c.it in name n; - put_string s bs + put_string s content - let custom_section n bs = - section 0 custom (n, bs) true + let custom_section place c = + let here = Custom.(compare_place c.it.place place) <= 0 in + if here then section 0 custom c true; + here (* Module *) + let rec iterate f xs = + match xs with + | [] -> [] + | x::xs' -> if f x then iterate f xs' else xs - let module_ m = + let module_ m cs = + let open Custom in word32 0x6d736100l; word32 version; + let cs = iterate (custom_section (Before Type)) cs in type_section m.it.types; + let cs = iterate (custom_section (Before Import)) cs in import_section m.it.imports; + let cs = iterate (custom_section (Before Func)) cs in func_section m.it.funcs; + let cs = iterate (custom_section (Before Table)) cs in table_section m.it.tables; + let cs = iterate (custom_section (Before Memory)) cs in memory_section m.it.memories; + let cs = iterate (custom_section (Before Global)) cs in global_section m.it.globals; + let cs = iterate (custom_section (Before Export)) cs in export_section m.it.exports; + let cs = iterate (custom_section (Before Start)) cs in start_section m.it.start; + let cs = iterate (custom_section (Before Elem)) cs in elem_section m.it.elems; + let cs = iterate (custom_section (Before DataCount)) cs in data_count_section m.it.datas m; + let cs = iterate (custom_section (Before Code)) cs in code_section m.it.funcs; - data_section m.it.datas + let cs = iterate (custom_section (Before Data)) cs in + data_section m.it.datas; + let cs = iterate (custom_section (After Data)) cs in + assert (cs = []) end +let encode_custom m bs (module S : Custom.Section) = + let open Source in + let c = S.Handler.encode m bs S.it in + Custom.{c.it with place = S.Handler.place S.it} @@ c.at + let encode m = let module E = E (struct let stream = stream () end) in - E.module_ m; to_string E.s + E.module_ m []; to_string E.s -let encode_custom name content = +let encode_with_custom (m, secs) = + let bs = encode m in let module E = E (struct let stream = stream () end) in - E.custom_section name content; to_string E.s + let cs = List.map (encode_custom m bs) secs in + E.module_ m cs; to_string E.s diff --git a/interpreter/binary/encode.mli b/interpreter/binary/encode.mli index 00fcd31640..05d1f2a677 100644 --- a/interpreter/binary/encode.mli +++ b/interpreter/binary/encode.mli @@ -2,4 +2,4 @@ exception Code of Source.region * string val version : int32 val encode : Ast.module_ -> string -val encode_custom : Ast.name -> string -> string +val encode_with_custom : Ast.module_ * Custom.section list -> string diff --git a/interpreter/custom/custom.ml b/interpreter/custom/custom.ml new file mode 100644 index 0000000000..17ed651f6b --- /dev/null +++ b/interpreter/custom/custom.ml @@ -0,0 +1,85 @@ +(* Raw custom section *) + +type section_kind = + | Custom + | Type + | Import + | Func + | Table + | Memory + | Global + | Export + | Start + | Elem + | Code + | Data + | DataCount + +type place = + | Before of section_kind + | After of section_kind + +type custom = custom' Source.phrase +and custom' = +{ + name : Ast.name; + content : string; + place : place; +} + + +let first = Type +let last = Data + +let compare_place pl1 pl2 = + match pl1, pl2 with + | Before s1, Before s2 + | After s1, After s2 -> compare s1 s2 + | Before s1, After s2 -> if s1 = s2 then -1 else compare s1 s2 + | After s1, Before s2 -> if s1 = s2 then +1 else compare s1 s2 + + +(* Handlers *) + +exception Code of Source.region * string +exception Syntax of Source.region * string +exception Invalid of Source.region * string + +module type Handler = +sig + type format' + type format = format' Source.phrase + val name : Ast.name + val place : format -> place + val decode : Ast.module_ -> string -> custom -> format (* raise Code *) + val encode : Ast.module_ -> string -> format -> custom + val parse : Ast.module_ -> string -> Annot.annot list -> format list (* raise Syntax *) + val arrange : Ast.module_ -> Sexpr.sexpr -> format -> Sexpr.sexpr + val check : Ast.module_ -> format -> unit (* raise Invalid *) +end + +module type Section = +sig + module Handler : Handler + val it : Handler.format +end + +type section = (module Section) + +let compare_section (module S1 : Section) (module S2 : Section) = + match compare_place (S1.Handler.place S1.it) (S2.Handler.place S2.it) with + | 0 -> compare S1.it.Source.at S2.it.Source.at + | n -> n + + +(* Handler registry *) + +module Registry = Map.Make(struct type t = Ast.name let compare = compare end) + +let registry = ref Registry.empty + +let register (module H : Handler) = + registry := Registry.add H.name (module H : Handler) !registry + +let handler (name : Ast.name) : (module Handler) option = + Registry.find_opt name !registry diff --git a/interpreter/custom/handler_custom.ml b/interpreter/custom/handler_custom.ml new file mode 100644 index 0000000000..3a1d8054f8 --- /dev/null +++ b/interpreter/custom/handler_custom.ml @@ -0,0 +1,168 @@ +(* Handler for @custom annotations *) + +open Custom +open Annot +open Source + +type format' = Custom.custom' +type format = Custom.custom + +let name = Utf8.decode "custom" + +let place fmt = fmt.it.place + + +(* Decoding & encoding *) + +let decode_content m custom = + let Custom.{name; content; place} = custom.it in + match Custom.handler name with + | Some (module Handler) -> + let module S = + struct + module Handler = Handler + let it = Handler.decode m "" custom + end + in Some (module S : Custom.Section) + | None -> + if !Flags.custom_reject then + raise (Custom.Code (custom.at, + "unknown custom section \"" ^ Utf8.encode name ^ "\"")) + else + None + +let decode m _bs custom = + ignore (decode_content m custom); + custom + +let encode _m _bs custom = custom + + +(* Parsing *) + +let parse_error at msg = raise (Custom.Syntax (at, msg)) + +let rec parse m _bs annots = List.map (parse_annot m) annots + +and parse_annot m annot = + let {name = n; items} = annot.it in + assert (n = name); + let cname, items' = parse_name annot.at items in + let place, items'' = parse_place_opt items' in + let content, items''' = parse_content items'' in + parse_end items'''; + let Ast.{types; globals; tables; memories; funcs; start; + elems; datas; imports; exports} = m.it in + let outside x = + if annot.at.left >= x.at.left && annot.at.right <= x.at.right then + parse_error annot.at "misplaced @custom annotation" + in + List.iter outside types; + List.iter outside globals; + List.iter outside tables; + List.iter outside memories; + List.iter outside funcs; + List.iter outside (Option.to_list start); + List.iter outside elems; + List.iter outside datas; + List.iter outside imports; + List.iter outside exports; + let custom = {name = cname; content; place} @@ annot.at in + ignore (decode_content m custom); + custom + +and parse_name at = function + | {it = String s; at} :: items -> + (try Utf8.decode s, items with Utf8.Utf8 -> + parse_error at "@custom annotation: malformed UTF-8 encoding" + ) + | _ -> + parse_error at "@custom annotation: missing section name" + +and parse_place_opt = function + | {it = Parens items'; at} :: items -> + let dir, items'' = parse_direction at items' in + let sec, items''' = parse_section at items'' in + parse_end items'''; + dir sec, items + | items -> + After last, items + +and parse_direction at = function + | {it = Atom "before"; _} :: items -> (fun sec -> Before sec), items + | {it = Atom "after"; _} :: items -> (fun sec -> After sec), items + | _ -> + parse_error at "@custom annotation: malformed placement" + +and parse_section at = function + | {it = Atom "type"; _} :: items -> Type, items + | {it = Atom "import"; _} :: items -> Import, items + | {it = Atom "func"; _} :: items -> Func, items + | {it = Atom "table"; _} :: items -> Table, items + | {it = Atom "memory"; _} :: items -> Memory, items + | {it = Atom "global"; _} :: items -> Global, items + | {it = Atom "export"; _} :: items -> Export, items + | {it = Atom "start"; _} :: items -> Start, items + | {it = Atom "elem"; _} :: items -> Elem, items + | {it = Atom "code"; _} :: items -> Code, items + | {it = Atom "data"; _} :: items -> Data, items + | {it = Atom "datacount"; _} :: items -> DataCount, items + | {it = Atom "first"; _} :: items -> first, items + | {it = Atom "last"; _} :: items -> last, items + | _ -> + parse_error at "@custom annotation: malformed section kind" + +and parse_content = function + | {it = String bs; _} :: items -> + let bs', items' = parse_content items in + bs ^ bs', items' + | items -> "", items + +and parse_end = function + | [] -> () + | item :: _ -> + parse_error item.at "@custom annotation: unexpected token" + + +(* Printing *) + +open Sexpr + +let rec arrange _m mnode custom = + let {name; content; place} = custom.it in + let node = Node ("@custom " ^ Arrange.name name, + arrange_place place :: Arrange.break_bytes content + ) in + match mnode with + | Sexpr.Atom _ -> assert false + | Node (name, secs) -> Node (name, secs @ [node]) + +and arrange_place = function + | Before sec -> Node ("before", [Atom (arrange_sec sec)]) + | After sec -> Node ("after", [Atom (arrange_sec sec)]) + +and arrange_sec = function + | Custom -> assert false + | Type -> "type" + | Import -> "import" + | Func -> "func" + | Table -> "table" + | Memory -> "memory" + | Global -> "global" + | Export -> "export" + | Start -> "start" + | Elem -> "elem" + | Code -> "code" + | Data -> "data" + | DataCount -> "datacount" + + +(* Checking *) + +let check m custom = + let {place; _} = custom.it in + assert (compare_place place (After Custom) > 0); + match decode_content m custom with + | None -> () + | Some (module S : Custom.Section) -> + S.Handler.check m S.it diff --git a/interpreter/custom/handler_custom.mli b/interpreter/custom/handler_custom.mli new file mode 100644 index 0000000000..618ad1e4bd --- /dev/null +++ b/interpreter/custom/handler_custom.mli @@ -0,0 +1 @@ +include Custom.Handler with type format' = Custom.custom' diff --git a/interpreter/custom/handler_name.ml b/interpreter/custom/handler_name.ml new file mode 100644 index 0000000000..7c28708b9b --- /dev/null +++ b/interpreter/custom/handler_name.ml @@ -0,0 +1,358 @@ +(* Handler for "name" section and @name annotations *) + +open Custom +open Annot +open Source + +module IdxMap = Map.Make(Int32) + +type name = Ast.name Source.phrase +type name_map = name IdxMap.t +type indirect_name_map = name_map Source.phrase IdxMap.t + +type format = format' Source.phrase +and format' = +{ + module_ : name option; + funcs : name_map; + locals : indirect_name_map; +} + + +let empty = {module_ = None; funcs = IdxMap.empty; locals = IdxMap.empty } + +let name = Utf8.decode "name" + +let place _fmt = After last + + +(* Decoding *) + +(* TODO: make Decode module reusable instead of duplicating code *) + +type stream = {bytes : string; pos : int ref} + +exception EOS + +let stream bs = {bytes = bs; pos = ref 0} + +let len s = String.length s.bytes +let pos s = !(s.pos) +let eos s = (pos s = len s) + +let check n s = if pos s + n > len s then raise EOS +let skip n s = if n < 0 then raise EOS else check n s; s.pos := !(s.pos) + n + +let read s = Char.code (s.bytes.[!(s.pos)]) +let peek s = if eos s then None else Some (read s) +let get s = check 1 s; let b = read s in skip 1 s; b +let get_string n s = let i = pos s in skip n s; String.sub s.bytes i n + +let position pos = Source.{file = "@name section"; line = -1; column = pos} +let region left right = Source.{left = position left; right = position right} + +let at f s = + let left = pos s in + let x = f s in + let right = pos s in + Source.(x @@ region left right) + +let decode_error pos msg = raise (Custom.Code (region pos pos, msg)) +let require b pos msg = if not b then decode_error pos msg + +let decode_byte s = + get s + +let rec decode_uN n s = + require (n > 0) (pos s) "integer representation too long"; + let b = decode_byte s in + require (n >= 7 || b land 0x7f < 1 lsl n) (pos s - 1) "integer too large"; + let x = Int32.of_int (b land 0x7f) in + if b land 0x80 = 0 then x else + Int32.(logor x (shift_left (decode_uN (n - 7) s) 7)) + +let decode_u32 = decode_uN 32 + +let decode_size s = + Int32.to_int (decode_u32 s) + +let decode_name s = + let n = decode_size s in + let pos = pos s in + try Utf8.decode (get_string n s) with Utf8.Utf8 -> + decode_error pos "malformed UTF-8 encoding" + +let decode_name_assoc s = + let x = decode_u32 s in + let n = decode_name s in + (x, n) + +let decode_name_map s = + let n = decode_size s in + let m = ref IdxMap.empty in + for _ = 1 to n do + let {it = (x, name); at} = at decode_name_assoc s in + if IdxMap.mem x !m then + decode_error at.left.column "custom @name: multiple function or local names"; + m := IdxMap.add x (name @@ at) !m + done; + !m + +let decode_indirect_name_assoc s = + let x = decode_u32 s in + let m = at decode_name_map s in + (x, m) + +let decode_indirect_name_map s = + let n = decode_size s in + let m = ref IdxMap.empty in + for _ = 1 to n do + let {it = (x, m'); at} = at decode_indirect_name_assoc s in + if IdxMap.mem x !m then + decode_error at.left.column "custom @name: multiple function names"; + m := IdxMap.add x m' !m + done; + !m + +let decode_module s = Some (at decode_name s) +let decode_funcs s = decode_name_map s +let decode_locals s = decode_indirect_name_map s + +let decode_subsec id f default s = + match peek s with + | None -> default + | Some id' when id' <> id -> default + | _ -> + let _id = decode_byte s in + let n = decode_size s in + let pos' = pos s in + let ss = f s in + require (pos s = pos' + n) (pos s) "name subsection size mismatch"; + ss + +let decode _m _bs custom = + let s = stream custom.it.content in + try + let module_ = decode_subsec 0x00 decode_module None s in + let funcs = decode_subsec 0x01 decode_funcs IdxMap.empty s in + let locals = decode_subsec 0x02 decode_locals IdxMap.empty s in + require (eos s) (pos s) "invalid name subsection id"; + {module_; funcs; locals} @@ custom.at + with EOS -> decode_error (pos s) "unexpected end of name section" + + +(* Encoding *) + +(* TODO: make Encode module reusable *) + +let encode_byte buf b = + Buffer.add_char buf (Char.chr b) + +let rec encode_u32 buf i = + let b = Int32.(to_int (logand i 0x7fl)) in + if 0l <= i && i < 128l then encode_byte buf b + else ( + encode_byte buf (b lor 0x80); + encode_u32 buf (Int32.shift_right_logical i 7) + ) + +let encode_size buf n = + encode_u32 buf (Int32.of_int n) + +let encode_name buf n = + let s = Utf8.encode n in + encode_size buf (String.length s); + Buffer.add_string buf s + +let encode_name_assoc buf x n = + encode_u32 buf x; + encode_name buf n.it + +let encode_name_map buf m = + encode_size buf (IdxMap.cardinal m); + IdxMap.iter (encode_name_assoc buf) m + +let encode_indirect_name_assoc buf x m = + encode_u32 buf x; + encode_name_map buf m.it + +let encode_indirect_name_map buf m = + encode_size buf (IdxMap.cardinal m); + IdxMap.iter (encode_indirect_name_assoc buf) m + +let encode_subsec_begin buf id = + encode_byte buf id; + let pre = Buffer.contents buf in + Buffer.clear buf; + pre + +let encode_subsec_end buf pre = + let contents = Buffer.contents buf in + Buffer.clear buf; + Buffer.add_string buf pre; + encode_size buf (String.length contents); + Buffer.add_string buf contents + +let encode_module buf name_opt = + match name_opt with + | None -> () + | Some name -> + let subsec = encode_subsec_begin buf 0x00 in + encode_name buf name.it; + encode_subsec_end buf subsec + +let encode_funcs buf name_map = + if not (IdxMap.is_empty name_map) then begin + let subsec = encode_subsec_begin buf 0x01 in + encode_name_map buf name_map; + encode_subsec_end buf subsec + end + +let encode_locals buf name_map_map = + if not (IdxMap.is_empty name_map_map) then begin + let subsec = encode_subsec_begin buf 0x02 in + encode_indirect_name_map buf name_map_map; + encode_subsec_end buf subsec + end + +let encode _m _bs sec = + let {module_; funcs; locals} = sec.it in + let buf = Buffer.create 200 in + encode_module buf module_; + encode_funcs buf funcs; + encode_locals buf locals; + let content = Buffer.contents buf in + {name = Utf8.decode "name"; content; place = After last} @@ sec.at + + +(* Parsing *) + +open Ast + +let parse_error at msg = raise (Custom.Syntax (at, msg)) + +let merge_name_opt n1 n2 = + match n1, n2 with + | None, None -> None + | None, some + | some, None -> some + | Some _, Some n2 -> + parse_error n2.at "@name annotation: multiple module names" + +let merge_name_map m1 m2 = + IdxMap.union (fun x _ n2 -> + parse_error n2.at "@name annotation: multiple function names" + ) m1 m2 + +let merge_indirect_name_map m1 m2 = + IdxMap.union (fun x m1' m2' -> + Some ( + IdxMap.union (fun x _ n2 -> + parse_error n2.at "@name annotation: multiple local names" + ) m1'.it m2'.it @@ {left = m1'.at.left; right = m2'.at.right} + ) + ) m1 m2 + +let merge s1 s2 = + { + module_ = merge_name_opt s1.it.module_ s2.it.module_; + funcs = merge_name_map s1.it.funcs s2.it.funcs; + locals = merge_indirect_name_map s1.it.locals s2.it.locals; + } @@ {left = s1.at.left; right = s2.at.right} + + +let is_contained r1 r2 = r1.left >= r2.left && r1.right <= r2.right +let is_left r1 r2 = r1.right <= r2.left + +let locate_func bs x name at (f : func) = + if is_left at f.it.ftype.at then + {empty with funcs = IdxMap.singleton x name} + else if f.it.body = [] || is_left at (List.hd f.it.body).at then + (* TODO re-parse the function params and locals from bs *) + parse_error at "@name annotation: local names not yet supported" + else + parse_error at "@name annotation: misplaced annotation" + +let locate_module bs name at (m : module_) = + if not (is_contained at m.at) then + parse_error at "misplaced @name annotation"; + let {types; globals; tables; memories; funcs; start; + elems; datas; imports; exports} = m.it in + let ats = + List.map (fun p -> p.at) types @ + List.map (fun p -> p.at) globals @ + List.map (fun p -> p.at) tables @ + List.map (fun p -> p.at) memories @ + List.map (fun p -> p.at) funcs @ + List.map (fun p -> p.at) (Option.to_list start) @ + List.map (fun p -> p.at) elems @ + List.map (fun p -> p.at) datas @ + List.map (fun p -> p.at) imports @ + List.map (fun p -> p.at) exports |> List.sort compare + in + match ats with + | [] -> {empty with module_ = Some name} + | at1::_ when is_left at at1 -> {empty with module_ = Some name} + | _ -> + match Lib.List.index_where (fun f -> is_contained at f.at) funcs with + | Some x -> locate_func bs (Int32.of_int x) name at (List.nth funcs x) + | None -> parse_error at "misplaced @name annotation" + + +let rec parse m bs annots = + let ms = List.map (parse_annot m bs) annots in + match ms with + | [] -> [] + | m::ms' -> [List.fold_left merge (empty @@ m.at) ms] + +and parse_annot m bs annot = + let {name = n; items} = annot.it in + assert (n = name); + let name, items' = parse_name annot.at items in + parse_end items'; + locate_module bs name annot.at m @@ annot.at + +and parse_name at = function + | {it = String s; at} :: items -> + (try Utf8.decode s @@ at, items with Utf8.Utf8 -> + parse_error at "malformed UTF-8 encoding" + ) + | _ -> + parse_error at "@name annotation: string expected" + +and parse_end = function + | [] -> () + | item :: _ -> + parse_error item.at "@name annotation: unexpected token" + + +(* Printing *) + +let arrange m bs fmt = + (* Print as generic custom section *) + Handler_custom.arrange m bs (encode m "" fmt) + + +(* Checking *) + +let check_error at msg = raise (Custom.Invalid (at, msg)) + +let check (m : module_) (fmt : format) = + IdxMap.iter (fun x name -> + if I32.ge_u x (Lib.List32.length m.it.funcs) then + check_error name.at ("custom @name: invalid function index " ^ + I32.to_string_u x) + ) fmt.it.funcs; + IdxMap.iter (fun x map -> + if I32.ge_u x (Lib.List32.length m.it.funcs) then + check_error map.at ("custom @name: invalid function index " ^ + I32.to_string_u x); + let f = Lib.List32.nth m.it.funcs x in + let Types.FuncType (ts, _) = func_type_for m f.it.ftype in + let n = I32.add (Lib.List32.length ts) (Lib.List32.length f.it.locals) in + IdxMap.iter (fun y name -> + if I32.ge_u y n then + check_error name.at ("custom @name: invalid local index " ^ + I32.to_string_u y ^ " for function " ^ I32.to_string_u x) + ) map.it; + ) fmt.it.locals diff --git a/interpreter/custom/handler_name.mli b/interpreter/custom/handler_name.mli new file mode 100644 index 0000000000..caccb08bc1 --- /dev/null +++ b/interpreter/custom/handler_name.mli @@ -0,0 +1 @@ +include Custom.Handler diff --git a/interpreter/main/flags.ml b/interpreter/main/flags.ml index b92378aa2f..14140ff358 100644 --- a/interpreter/main/flags.ml +++ b/interpreter/main/flags.ml @@ -5,4 +5,5 @@ let print_sig = ref false let dry = ref false let width = ref 80 let harness = ref true +let custom_reject = ref false let budget = ref 256 diff --git a/interpreter/main/main.ml b/interpreter/main/main.ml index beeb980494..bf594b827a 100644 --- a/interpreter/main/main.ml +++ b/interpreter/main/main.ml @@ -1,9 +1,15 @@ let name = "wasm" let version = "2.0" -let configure () = +let all_handlers = [ + (module Handler_custom : Custom.Handler); + (module Handler_name : Custom.Handler); +] + +let configure custom_handlers = Import.register (Utf8.decode "spectest") Spectest.lookup; - Import.register (Utf8.decode "env") Env.lookup + Import.register (Utf8.decode "env") Env.lookup; + List.iter Custom.register custom_handlers let banner () = print_endline (name ^ " " ^ version ^ " reference interpreter") @@ -13,6 +19,15 @@ let usage = "Usage: " ^ name ^ " [option] [file ...]" let args = ref [] let add_arg source = args := !args @ [source] +let customs = ref [] +let add_custom name = + let n = Utf8.decode name in + match List.find_opt (fun (module H : Custom.Handler) -> n = H.name) all_handlers with + | Some h -> customs := !customs @ [h] + | None -> + prerr_endline ("option -c: unknown custom section \"" ^ name ^ "\""); + exit 1 + let quote s = "\"" ^ String.escaped s ^ "\"" let argspec = Arg.align @@ -28,6 +43,12 @@ let argspec = Arg.align " configure call depth budget (default is " ^ string_of_int !Flags.budget ^ ")"; "-w", Arg.Int (fun n -> Flags.width := n), " configure output width (default is " ^ string_of_int !Flags.width ^ ")"; + "-c", Arg.String add_custom, + " recognize custom section"; + "-ca", Arg.Unit (fun () -> customs := all_handlers), + " recognize all known custom section"; + "-cr", Arg.Set Flags.custom_reject, + " reject unrecognized custom sections"; "-s", Arg.Set Flags.print_sig, " show module signatures"; "-u", Arg.Set Flags.unchecked, " unchecked, do not perform validation"; "-j", Arg.Clear Flags.harness, " exclude harness for JS conversion"; @@ -39,9 +60,9 @@ let argspec = Arg.align let () = Printexc.record_backtrace true; try - configure (); Arg.parse argspec (fun file -> add_arg ("(input " ^ quote file ^ ")")) usage; + configure !customs; List.iter (fun arg -> if not (Run.run_string arg) then exit 1) !args; if !args = [] then Flags.interactive := true; if !Flags.interactive then begin diff --git a/interpreter/meta/jslib/wast.ml b/interpreter/meta/jslib/wast.ml index 9af04f9189..a2dd7809d7 100644 --- a/interpreter/meta/jslib/wast.ml +++ b/interpreter/meta/jslib/wast.ml @@ -11,7 +11,7 @@ let _ = let def = Parse.string_to_module (Js.to_string s) in let bs = match def.Source.it with - | Script.Textual m -> (Encode.encode m) + | Script.Textual (m, cs) -> Encode.encode_with_custom (m, cs) | Script.Encoded (_, bs) -> bs | Script.Quoted (_, _) -> failwith "Unsupported" in let buf = new%js Typed_array.arrayBuffer (String.length bs) in diff --git a/interpreter/script/js.ml b/interpreter/script/js.ml index 2eb849a6c1..1ae30f55bf 100644 --- a/interpreter/script/js.ml +++ b/interpreter/script/js.ml @@ -523,11 +523,11 @@ let of_result res = let rec of_definition def = match def.it with - | Textual m -> of_bytes (Encode.encode m) + | Textual (m, _) -> of_bytes (Encode.encode m) | Encoded (_, bs) -> of_bytes bs | Quoted (_, s) -> - try of_definition (Parse.string_to_module s) with Parse.Syntax _ -> - of_bytes "" + try of_definition (Parse.string_to_module s) + with Parse.Syntax _ | Custom.Syntax _ -> of_bytes "" let of_wrapper mods x_opt name wrap_action wrap_assertion at = let x = of_var_opt mods x_opt in @@ -592,7 +592,7 @@ let of_command mods cmd = | Module (x_opt, def) -> let rec unquote def = match def.it with - | Textual m -> m + | Textual (m, _) -> m | Encoded (_, bs) -> Decode.decode "binary" bs | Quoted (_, s) -> unquote (Parse.string_to_module s) in bind mods x_opt (unquote def); diff --git a/interpreter/script/run.ml b/interpreter/script/run.ml index 583bd2a22f..90d2aa75c1 100644 --- a/interpreter/script/run.ml +++ b/interpreter/script/run.ml @@ -43,7 +43,7 @@ let dispatch_file_ext on_binary on_sexpr on_script_binary on_script on_js file = let create_binary_file file _ get_module = trace ("Encoding (" ^ file ^ ")..."); - let s = Encode.encode (get_module ()) in + let s = Encode.encode_with_custom (get_module ()) in let oc = open_out_bin file in try trace "Writing..."; @@ -55,7 +55,7 @@ let create_sexpr_file file _ get_module = trace ("Writing (" ^ file ^ ")..."); let oc = open_out file in try - Print.module_ oc !Flags.width (get_module ()); + Print.module_with_custom oc !Flags.width (get_module ()); close_out oc with exn -> close_out oc; raise exn @@ -87,7 +87,7 @@ let output_file = let output_stdout get_module = trace "Printing..."; - Print.module_ stdout !Flags.width (get_module ()) + Print.module_with_custom stdout !Flags.width (get_module ()) (* Input *) @@ -106,7 +106,10 @@ let input_from get_script run = with | Decode.Code (at, msg) -> error at "decoding error" msg | Parse.Syntax (at, msg) -> error at "syntax error" msg - | Valid.Invalid (at, msg) -> error at "invalid module" msg + | Valid.Invalid (at, msg) -> error at "validation error" msg + | Custom.Code (at, msg) -> error at "custom section decoding error" msg + | Custom.Syntax (at, msg) -> error at "custom annotation syntax error" msg + | Custom.Invalid (at, msg) -> error at "custom validation error" msg | Import.Unknown (at, msg) -> error at "link failure" msg | Eval.Link (at, msg) -> error at "link failure" msg | Eval.Trap (at, msg) -> error at "runtime trap" msg @@ -299,7 +302,7 @@ module Map = Map.Make(String) let quote : script ref = ref [] let scripts : script Map.t ref = ref Map.empty -let modules : Ast.module_ Map.t ref = ref Map.empty +let modules : (Ast.module_ * Custom.section list) Map.t ref = ref Map.empty let instances : Instance.module_inst Map.t ref = ref Map.empty let registry : Instance.module_inst Map.t ref = ref Map.empty @@ -332,12 +335,12 @@ let lookup_registry module_name item_name _t = (* Running *) -let rec run_definition def : Ast.module_ = +let rec run_definition def : Ast.module_ * Custom.section list = match def.it with - | Textual m -> m + | Textual (m, cs) -> m, cs | Encoded (name, bs) -> trace "Decoding..."; - Decode.decode name bs + Decode.decode_with_custom name bs | Quoted (_, s) -> trace "Parsing quote..."; let def' = Parse.string_to_module s in @@ -447,24 +450,28 @@ let run_assertion ass = (match ignore (run_definition def) with | exception Decode.Code (_, msg) -> assert_message ass.at "decoding" msg re | exception Parse.Syntax (_, msg) -> assert_message ass.at "parsing" msg re + | exception Custom.Syntax (_, msg) -> + assert_message ass.at "annotation parsing" msg re | _ -> Assert.error ass.at "expected decoding/parsing error" ) | AssertInvalid (def, re) -> trace "Asserting invalid..."; (match - let m = run_definition def in - Valid.check_module m + let m, cs = run_definition def in + Valid.check_module_with_custom (m, cs) with | exception Valid.Invalid (_, msg) -> assert_message ass.at "validation" msg re + | exception Custom.Invalid (_, msg) -> + assert_message ass.at "custom validation" msg re | _ -> Assert.error ass.at "expected validation error" ) | AssertUnlinkable (def, re) -> trace "Asserting unlinkable..."; - let m = run_definition def in - if not !Flags.unchecked then Valid.check_module m; + let m, cs = run_definition def in + if not !Flags.unchecked then Valid.check_module_with_custom (m, cs); (match let imports = Import.link m in ignore (Eval.init m imports) @@ -476,8 +483,8 @@ let run_assertion ass = | AssertUninstantiable (def, re) -> trace "Asserting trap..."; - let m = run_definition def in - if not !Flags.unchecked then Valid.check_module m; + let m, cs = run_definition def in + if not !Flags.unchecked then Valid.check_module_with_custom (m, cs); (match let imports = Import.link m in ignore (Eval.init m imports) @@ -512,16 +519,16 @@ let rec run_command cmd = match cmd.it with | Module (x_opt, def) -> quote := cmd :: !quote; - let m = run_definition def in + let m, cs = run_definition def in if not !Flags.unchecked then begin trace "Checking..."; - Valid.check_module m; + Valid.check_module_with_custom (m, cs); if !Flags.print_sig then begin trace "Signature:"; print_module x_opt m end end; - bind "module" modules x_opt m; + bind "module" modules x_opt (m, cs); bind "script" scripts x_opt [cmd]; if not !Flags.dry then begin trace "Initializing..."; diff --git a/interpreter/script/script.ml b/interpreter/script/script.ml index 4c4f550f2c..e7ad2fa906 100644 --- a/interpreter/script/script.ml +++ b/interpreter/script/script.ml @@ -7,7 +7,7 @@ type literal = Values.value Source.phrase type definition = definition' Source.phrase and definition' = - | Textual of Ast.module_ + | Textual of Ast.module_ * Custom.section list | Encoded of string * string | Quoted of string * string diff --git a/interpreter/text/annot.ml b/interpreter/text/annot.ml new file mode 100644 index 0000000000..779086e6f2 --- /dev/null +++ b/interpreter/text/annot.ml @@ -0,0 +1,53 @@ +open Source + +type annot = annot' Source.phrase +and annot' = {name : Ast.name; items : item list} + +and item = item' Source.phrase +and item' = + | Atom of string + | Var of string + | String of string + | Nat of string + | Int of string + | Float of string + | Parens of item list + | Annot of annot + + +(* Stateful recorder for annotations *) +(* I wish this could be encapsulated in the parser somehow *) + +module NameMap = Map.Make(struct type t = Ast.name let compare = compare end) +type map = annot list NameMap.t + +let current : map ref = ref NameMap.empty +let current_source : Buffer.t = Buffer.create 512 + +let reset () = + current := NameMap.empty; + Buffer.clear current_source + +let get_source () = + Buffer.contents current_source + +let record annot = + let old = Lib.Option.get (NameMap.find_opt annot.it.name !current) [] in + current := NameMap.add annot.it.name (annot::old) !current + +let is_contained r1 r2 = r1.left >= r2.left && r1.right <= r2.right + +let get_all () = + let all = !current in + current := NameMap.empty; + all + +let filter f map = + NameMap.filter (fun _ annots -> annots <> []) + (NameMap.map (List.filter f) map) + +let get r = + let sub = filter (fun annot -> is_contained annot.at r) !current in + let map' = filter (fun annot -> not (is_contained annot.at r)) !current in + current := map'; + sub diff --git a/interpreter/text/arrange.ml b/interpreter/text/arrange.ml index dc56743eb6..458e130a64 100644 --- a/interpreter/text/arrange.ml +++ b/interpreter/text/arrange.ml @@ -620,31 +620,35 @@ let global off i g = let start s = Node ("start " ^ var s.it.sfunc, []) +let custom m mnode (module S : Custom.Section) = + S.Handler.arrange m mnode S.it -(* Modules *) +(* Module *) let var_opt = function | None -> "" | Some x -> " " ^ x.it -let module_with_var_opt x_opt m = +let module_with_var_opt x_opt (m, cs) = let fx = ref 0 in let tx = ref 0 in let mx = ref 0 in let gx = ref 0 in let imports = list (import fx tx mx gx) m.it.imports in - Node ("module" ^ var_opt x_opt, + let ret = Node ("module" ^ var_opt x_opt, listi typedef m.it.types @ imports @ listi (table !tx) m.it.tables @ listi (memory !mx) m.it.memories @ listi (global !gx) m.it.globals @ - listi (func_with_index !fx) m.it.funcs @ list export m.it.exports @ opt start m.it.start @ listi elem m.it.elems @ + listi (func_with_index !fx) m.it.funcs @ listi data m.it.datas - ) + ) in + List.fold_left (custom m) ret cs + let binary_module_with_var_opt x_opt bs = Node ("module" ^ var_opt x_opt ^ " binary", break_bytes bs) @@ -652,7 +656,8 @@ let binary_module_with_var_opt x_opt bs = let quoted_module_with_var_opt x_opt s = Node ("module" ^ var_opt x_opt ^ " quote", break_string s) -let module_ = module_with_var_opt None +let module_with_custom = module_with_var_opt None +let module_ m = module_with_custom (m, []) (* Scripts *) @@ -677,20 +682,21 @@ let definition mode x_opt def = | `Textual -> let rec unquote def = match def.it with - | Textual m -> m - | Encoded (_, bs) -> Decode.decode "" bs + | Textual (m, cs) -> m, cs + | Encoded (_, bs) -> Decode.decode_with_custom "" bs | Quoted (_, s) -> unquote (Parse.string_to_module s) in module_with_var_opt x_opt (unquote def) | `Binary -> let rec unquote def = match def.it with - | Textual m -> Encode.encode m - | Encoded (_, bs) -> Encode.encode (Decode.decode "" bs) + | Textual (m, cs) -> Encode.encode_with_custom (m, cs) + | Encoded (_, bs) -> + Encode.encode_with_custom (Decode.decode_with_custom "" bs) | Quoted (_, s) -> unquote (Parse.string_to_module s) in binary_module_with_var_opt x_opt (unquote def) | `Original -> match def.it with - | Textual m -> module_with_var_opt x_opt m + | Textual (m, cs) -> module_with_var_opt x_opt (m, cs) | Encoded (_, bs) -> binary_module_with_var_opt x_opt bs | Quoted (_, s) -> quoted_module_with_var_opt x_opt s with Parse.Syntax _ -> diff --git a/interpreter/text/arrange.mli b/interpreter/text/arrange.mli index 051686a443..a0fddd5d43 100644 --- a/interpreter/text/arrange.mli +++ b/interpreter/text/arrange.mli @@ -1,6 +1,14 @@ open Sexpr +val bytes : string -> string +val string : string -> string +val name : Ast.name -> string + +val break_bytes : string -> sexpr list +val break_string : string -> sexpr list + val instr : Ast.instr -> sexpr val func : Ast.func -> sexpr val module_ : Ast.module_ -> sexpr +val module_with_custom : Ast.module_ * Custom.section list -> sexpr val script : [`Textual | `Binary] -> Script.script -> sexpr list diff --git a/interpreter/text/lexer.mll b/interpreter/text/lexer.mll index e9897c8796..012387a6a7 100644 --- a/interpreter/text/lexer.mll +++ b/interpreter/text/lexer.mll @@ -697,7 +697,10 @@ rule token = parse | id as s { VAR s } - | "(@"name { annot (Lexing.lexeme_start_p lexbuf) lexbuf; token lexbuf } + | "(@"(name as n) + { let r = region lexbuf in + let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in + Annot.record (Annot.{name = Utf8.decode n; items} @@ r); token lexbuf } | "(@" { error lexbuf "malformed annotation id" } | ";;"utf8_no_nl*eof { EOF } @@ -714,16 +717,37 @@ rule token = parse | _ { error lexbuf "malformed UTF-8 encoding" } and annot start = parse - | ")" { () } - | "(" { annot (Lexing.lexeme_start_p lexbuf) lexbuf; annot start lexbuf } - - | reserved { annot start lexbuf } - | nat { annot start lexbuf } - | int { annot start lexbuf } - | float { annot start lexbuf } - | id { annot start lexbuf } - | string { annot start lexbuf } - | '"'character*('\n'|eof) { error lexbuf "unclosed string literal" } + | ")" { [] } + | "(" + { let r = region lexbuf in + let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in + (Annot.Parens items @@ r) :: annot start lexbuf } + | "(@"(name as n) + { let r = region lexbuf in + let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in + let ann = Annot.{name = Utf8.decode n; items} @@ r in + (Annot.Annot ann @@ r) :: annot start lexbuf } + + | nat as s + { let r = region lexbuf in + (Annot.Nat s @@ r) :: annot start lexbuf } + | int as s + { let r = region lexbuf in + (Annot.Int s @@ r) :: annot start lexbuf } + | float as s + { let r = region lexbuf in + (Annot.Float s @@ r) :: annot start lexbuf } + | id as s + { let r = region lexbuf in + (Annot.Var s @@ r) :: annot start lexbuf } + | string as s + { let r = region lexbuf in + (Annot.String (string s) @@ r) :: annot start lexbuf } + | reserved as s + { let r = region lexbuf in + (Annot.Atom s @@ r) :: annot start lexbuf } + | '"'character*('\n'|eof) + { error lexbuf "unclosed string literal" } | '"'character*['\x00'-'\x09''\x0b'-'\x1f''\x7f'] { error lexbuf "illegal control character in string literal" } | '"'character*'\\'_ diff --git a/interpreter/text/parse.ml b/interpreter/text/parse.ml index 71c4cc4a9c..666df7992f 100644 --- a/interpreter/text/parse.ml +++ b/interpreter/text/parse.ml @@ -5,10 +5,35 @@ type 'a start = exception Syntax = Script.Syntax + +let wrap_lexbuf lexbuf = + let open Lexing in + let inner_refill = lexbuf.refill_buff in + let refill_buff lexbuf = + let oldlen = lexbuf.lex_buffer_len - lexbuf.lex_start_pos in + inner_refill lexbuf; + let newlen = lexbuf.lex_buffer_len - lexbuf.lex_start_pos in + let start = lexbuf.lex_start_pos + oldlen in + let n = newlen - oldlen in + Buffer.add_subbytes Annot.current_source lexbuf.lex_buffer start n + in + let n = lexbuf.lex_buffer_len - lexbuf.lex_start_pos in + Buffer.add_subbytes Annot.current_source lexbuf.lex_buffer lexbuf.lex_start_pos n; + {lexbuf with refill_buff} + let parse' name lexbuf start = + Annot.reset (); + let lexbuf = wrap_lexbuf lexbuf in lexbuf.Lexing.lex_curr_p <- {lexbuf.Lexing.lex_curr_p with Lexing.pos_fname = name}; - try start Lexer.token lexbuf + try + let result = start Lexer.token lexbuf in + let annots = Annot.get_all () in + if not (Annot.NameMap.is_empty annots) then + let annot = List.hd (snd (Annot.NameMap.choose annots)) in + raise (Custom.Syntax (annot.Source.at, "misplaced annotation")) + else + result with Syntax (region, s) -> let region' = if region <> Source.no_region then region else {Source.left = Lexer.convert_pos lexbuf.Lexing.lex_start_p; diff --git a/interpreter/text/parser.mly b/interpreter/text/parser.mly index 7489acf72b..b940ad0d9e 100644 --- a/interpreter/text/parser.mly +++ b/interpreter/text/parser.mly @@ -202,6 +202,30 @@ let inline_type_explicit (c : context) x ft at = error at "inline function type does not match explicit type"; x + +(* Custom annotations *) + +let parse_annots (m : module_) : Custom.section list = + let bs = Annot.get_source () in + let annots = Annot.get m.at in + let secs = + Annot.NameMap.fold (fun name anns secs -> + match Custom.handler name with + | Some (module Handler) -> + let secs' = Handler.parse m bs anns in + List.map (fun fmt -> + let module S = struct module Handler = Handler let it = fmt end in + (module S : Custom.Section) + ) secs' @ secs + | None -> + if !Flags.custom_reject then + raise (Custom.Syntax ((List.hd anns).at, + "unknown annotation @" ^ Utf8.encode name)) + else [] + ) annots [] + in + List.stable_sort Custom.compare_section secs + %} %token LPAR RPAR @@ -915,13 +939,13 @@ inline_export : /* Modules */ type_ : - | def_type { $1 @@ at () } + | def_type { $1 } type_def : | LPAR TYPE type_ RPAR - { fun c -> anon_type c $3 } + { let at = at () in fun c -> anon_type c ($3 @@ at) } | LPAR TYPE bind_var type_ RPAR /* Sugar */ - { fun c -> bind_type c $3 $4 } + { let at = at () in fun c -> bind_type c $3 ($4 @@ at) } start : | LPAR START var RPAR @@ -992,13 +1016,26 @@ module_var_opt : module_ : | LPAR MODULE module_var_opt module_fields RPAR - { $3, Textual ($4 (empty_context ()) () @@ at ()) @@ at () } + { let m = $4 (empty_context ()) () @@ at () in + $3, Textual (m, parse_annots m) @@ at () } inline_module : /* Sugar */ - | module_fields { Textual ($1 (empty_context ()) () @@ at ()) @@ at () } + | module_fields + { let at = at () in + (* Hack for empty modules *) + let at = if at.left <> at.right then at else + {at with right = {at.right with line = Stdlib.Int.max_int}} in + let m = $1 (empty_context ()) () @@ at in + Textual (m, parse_annots m) @@ at } inline_module1 : /* Sugar */ - | module_fields1 { Textual ($1 (empty_context ()) () @@ at ()) @@ at () } + | module_fields1 + { let at = at () in + (* Hack for empty modules *) + let at = if at.left <> at.right then at else + {at with right = {at.right with line = Stdlib.Int.max_int}} in + let m = $1 (empty_context ()) () @@ at in + Textual (m, parse_annots m) @@ at } /* Scripts */ diff --git a/interpreter/text/print.ml b/interpreter/text/print.ml index 9496182147..44bf4e5756 100644 --- a/interpreter/text/print.ml +++ b/interpreter/text/print.ml @@ -1,5 +1,6 @@ let instr oc width e = Sexpr.output oc width (Arrange.instr e) let func oc width f = Sexpr.output oc width (Arrange.func f) let module_ oc width m = Sexpr.output oc width (Arrange.module_ m) +let module_with_custom oc width m_cs = Sexpr.output oc width (Arrange.module_with_custom m_cs) let script oc width mode s = List.iter (Sexpr.output oc width) (Arrange.script mode s) diff --git a/interpreter/text/print.mli b/interpreter/text/print.mli index 861ae40d97..c97dc9f618 100644 --- a/interpreter/text/print.mli +++ b/interpreter/text/print.mli @@ -1,4 +1,5 @@ val instr : out_channel -> int -> Ast.instr -> unit val func : out_channel -> int -> Ast.func -> unit val module_ : out_channel -> int -> Ast.module_ -> unit +val module_with_custom : out_channel -> int -> Ast.module_ * Custom.section list -> unit val script : out_channel -> int -> [`Textual | `Binary] -> Script.script -> unit diff --git a/interpreter/valid/valid.ml b/interpreter/valid/valid.ml index cfe7f310f2..bda7a74986 100644 --- a/interpreter/valid/valid.ml +++ b/interpreter/valid/valid.ml @@ -735,3 +735,7 @@ let check_module (m : module_) = ignore (List.fold_left (check_export c) NameSet.empty exports); require (List.length c.memories <= 1) m.at "multiple memories are not allowed (yet)" + +let check_module_with_custom ((m : module_), (cs : Custom.section list)) = + check_module m; + List.iter (fun (module S : Custom.Section) -> S.Handler.check m S.it) cs diff --git a/interpreter/valid/valid.mli b/interpreter/valid/valid.mli index 5827ae56e9..ef89313312 100644 --- a/interpreter/valid/valid.mli +++ b/interpreter/valid/valid.mli @@ -1,3 +1,4 @@ exception Invalid of Source.region * string val check_module : Ast.module_ -> unit (* raises Invalid *) +val check_module_with_custom : Ast.module_ * Custom.section list -> unit (* raises Invalid, Custom.Check *) diff --git a/test/core/annotations.wast b/test/core/annotations.wast index 865581c68a..f90dc4fb34 100644 --- a/test/core/annotations.wast +++ b/test/core/annotations.wast @@ -1,18 +1,20 @@ -(@a) +(module + (@a) -(@aas-3!@$d-@#4) -(@@) (@$) (@+) (@0) (@.) (@!$@#$23414@#$) -(@a x y z) -(@a x-y $yz "aa" -2 0.3 0x3) -(@a x-y$yz"aa"-2) -(@a block func module i32.add) -(@a 0x 8q 0xfa #4g0-.@f#^&@#$*0sf -- @#) -(@a , ; ] [ }} }x{ ({) ,{{};}] ;) -(@a (bla) () (5-g) ("aa" a) ($x) (bla bla) (x (y)) ")" "(" x")"y) -(@a @ @x (@x) (@x y) (@) (@ x) (@(@(@(@))))) -(@a (;bla;) (; ) ;) - ;; bla) - ;; bla (@x + (@aas-3!@$d-@#4) + (@@) (@$) (@+) (@0) (@.) (@!$@#$23414@#$) + (@a x y z) + (@a x-y $yz "aa" -2 0.3 0x3) + (@a x-y$yz"aa"-2) + (@a block func module i32.add) + (@a 0x 8q 0xfa #4g0-.@f#^&@#$*0sf -- @#) + (@a , ; ] [ }} }x{ ({) ,{{};}] ;) + (@a (bla) () (5-g) ("aa" a) ($x) (bla bla) (x (y)) ")" "(" x")"y) + (@a @ @x (@x) (@x y) (@) (@ x) (@(@(@(@))))) + (@a (;bla;) (; ) ;) + ;; bla) + ;; bla (@x + ) ) (assert_malformed (module quote "(@a \00)") "illegal character") @@ -115,7 +117,7 @@ ((@a) export (@a) "f" (@a) ((@a) func (@a) $f (@a)) (@a) ) (@a) -) (@a) +) ((@a) module (@a) $m1 (@a) (@a) ((@a) global (@a) $g (@a) @@ -140,7 +142,7 @@ ((@a) param (@a) i32 (@a) f32 (@a)) (@a) ((@a) result (@a)) (@a) ) (@a) -) (@a) +) ((@a) module (@a) $m2 (@a) (@a) ((@a) type (@a) $T (@a) @@ -192,4 +194,4 @@ (func $s) ((@a) start (@a) $s (@a)) (@a) -) (@a) +) diff --git a/test/core/run.py b/test/core/run.py index fa80ea969b..350810a91a 100755 --- a/test/core/run.py +++ b/test/core/run.py @@ -13,27 +13,29 @@ ownDir = os.path.dirname(os.path.abspath(sys.argv[0])) inputDir = ownDir outputDir = os.path.join(inputDir, "_output") +opts = "" + +mainTestFiles = glob.glob(os.path.join(inputDir, "*.wast")) +otherTestFiles = glob.glob(os.path.join(inputDir, "[a-z]*/*.wast")) parser = argparse.ArgumentParser() parser.add_argument("--wasm", metavar="", default=os.path.join(os.getcwd(), "wasm")) parser.add_argument("--js", metavar="") parser.add_argument("--generate-js-only", action='store_true') parser.add_argument("--out", metavar="", default=outputDir) +parser.add_argument("--opts", metavar="", default=opts) parser.add_argument("file", nargs='*') arguments = parser.parse_args() sys.argv = sys.argv[:1] -main_test_files = glob.glob(os.path.join(inputDir, "*.wast")) -# SIMD test files are in a subdirectory. -simd_test_files = glob.glob(os.path.join(inputDir, "simd", "*.wast")) - -wasmCommand = arguments.wasm +wasmExec = arguments.wasm jsCommand = arguments.js generateJsOnly = arguments.generate_js_only outputDir = arguments.out -inputFiles = arguments.file if arguments.file else main_test_files + simd_test_files +inputFiles = arguments.file if arguments.file else mainTestFiles + otherTestFiles +wasmCommand = wasmExec + " " + arguments.opts -if not os.path.exists(wasmCommand): +if not os.path.exists(wasmExec): sys.stderr.write("""\ Error: The executable '%s' does not exist. Provide the correct path with the '--wasm' flag. diff --git a/test/custom/custom/custom_annot.wast b/test/custom/custom/custom_annot.wast new file mode 100644 index 0000000000..6d6735e85f --- /dev/null +++ b/test/custom/custom/custom_annot.wast @@ -0,0 +1,99 @@ +(module + (type $t (func)) + (@custom "my-section1" "contents-bytes1") + (@custom "my-section2" "more-contents-bytes0") + (@custom "my-section1" "contents-bytes2") + (@custom "my-section2" (before global) "more-contents-bytes1") + (@custom "my-section2" (after func) "more-contents-bytes2") + (@custom "my-section2" (after func) "more-contents-bytes3") + (@custom "my-section2" (before global) "more-contents-bytes4") + (func) + (@custom "my-section2" "more-contents-bytes5") + + (global $g i32 (i32.const 0)) + (@custom "my-section3") + (@custom "my-section4" "" "1" "" "2" "3" "") + (@custom "") +) + +(module quote "(@custom \"bla\")") +(module quote "(module (@custom \"bla\"))") + + +;; Malformed name + +(assert_malformed + (module quote "(@custom)") + "@custom annotation: missing section name" +) + +(assert_malformed + (module quote "(@custom 4)") + "@custom annotation: missing section name" +) + +(assert_malformed + (module quote "(@custom bla)") + "@custom annotation: missing section name" +) + +(assert_malformed + (module quote "(@custom \"\\df\")") + "@custom annotation: malformed UTF-8 encoding" +) + + +;; Malformed placement + +(assert_malformed + (module quote "(@custom \"bla\" here)") + "@custom annotation: unexpected token" +) + +(assert_malformed + (module quote "(@custom \"bla\" after)") + "@custom annotation: unexpected token" +) + +(assert_malformed + (module quote "(@custom \"bla\" (after))") + "@custom annotation: malformed section kind" +) + +(assert_malformed + (module quote "(@custom \"bla\" (type))") + "@custom annotation: malformed placement" +) + +(assert_malformed + (module quote "(@custom \"bla\" (aft type))") + "@custom annotation: malformed placement" +) + +(assert_malformed + (module quote "(@custom \"bla\" (before types))") + "@custom annotation: malformed section kind" +) + + +;; Misplaced + +(assert_malformed + (module quote "(type (@custom \"bla\") $t (func))") + "misplaced @custom annotation" +) + +(assert_malformed + (module quote "(func (@custom \"bla\"))") + "misplaced @custom annotation" +) + +(assert_malformed + (module quote "(func (block (@custom \"bla\")))") + "misplaced @custom annotation" +) + +(assert_malformed + (module quote "(func (nop (@custom \"bla\")))") + "misplaced @custom annotation" +) diff --git a/test/custom/name/name_annot.wast b/test/custom/name/name_annot.wast new file mode 100644 index 0000000000..5421151fd2 --- /dev/null +++ b/test/custom/name/name_annot.wast @@ -0,0 +1,20 @@ +;; Module names + +(module (@name "Modül")) + +(module $moduel (@name "Modül")) + +(assert_malformed + (module quote "(module (@name \"M1\") (@name \"M2\"))") + "@name annotation: multiple module" +) + +(assert_malformed + (module quote "(module (func) (@name \"M\"))") + "misplaced @name annotation" +) + +(assert_malformed + (module quote "(module (start $f (@name \"M\")) (func $f))") + "misplaced @name annotation" +)