From 8327cf0c8a6d9539d03b8dfaad9e478b19435a51 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 17 Apr 2016 13:51:35 -0400 Subject: [PATCH 01/21] safe-string: make Lexing safe --- src/batLexing.mli | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/batLexing.mli b/src/batLexing.mli index 9aec16122..78a2b94d5 100644 --- a/src/batLexing.mli +++ b/src/batLexing.mli @@ -60,7 +60,7 @@ val dummy_pos : position;; type lexbuf = Lexing.lexbuf = { refill_buff : lexbuf -> unit; - mutable lex_buffer : string; + mutable lex_buffer : bytes; mutable lex_buffer_len : int; mutable lex_abs_pos : int; mutable lex_start_pos : int; @@ -98,12 +98,12 @@ val from_string : string -> lexbuf the string. An end-of-input condition is generated when the end of the string is reached. *) -val from_function : (string -> int -> int) -> lexbuf +val from_function : (Bytes.t -> int -> int) -> lexbuf (** Create a lexer buffer with the given function as its reading method. When the scanner needs more characters, it will call the given - function, giving it a character string [s] and a character - count [n]. The function should put [n] characters or less in [s], - starting at character number 0, and return the number of characters + function, giving it a byte sequence [s] and a byte + count [n]. The function should put [n] bytes or less in [s], + starting at byte number 0, and return the number of byte provided. A return value of 0 means end of input. *) From 44a3a21e421a3be6b8d5cd337b68f0c094bfae11 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 17 Apr 2016 13:50:57 -0400 Subject: [PATCH 02/21] safe-string: make Big_int safe --- src/batBig_int.mliv | 2 +- src/batBig_int.mlv | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/batBig_int.mliv b/src/batBig_int.mliv index 4ce5b0e04..4693f08ca 100644 --- a/src/batBig_int.mliv +++ b/src/batBig_int.mliv @@ -345,7 +345,7 @@ val nat_of_big_int : big_int -> Nat.nat val big_int_of_nat : Nat.nat -> big_int val base_power_big_int: int -> int -> big_int -> big_int val sys_big_int_of_string: string -> int -> int -> big_int -val round_futur_last_digit : string -> int -> int -> bool +val round_futur_last_digit : Bytes.t -> int -> int -> bool val approx_big_int: int -> big_int -> string ##V>=4.3##val round_big_int_to_float: big_int -> bool -> float diff --git a/src/batBig_int.mlv b/src/batBig_int.mlv index 0e4279c2f..c9f798eee 100644 --- a/src/batBig_int.mlv +++ b/src/batBig_int.mlv @@ -21,11 +21,15 @@ let big_int_base_default_symbols = - let s = Bytes.create (10 + 26*2) in - let set off c k = Bytes.set s k (char_of_int (k - off + (int_of_char c))) in - for k = 0 to String.length s - 1 do - if k < 10 then set 0 '0' k else if k < 36 then set 10 'a' k else set 36 'A' k - done; s + let symbol offset base k = + char_of_int (k - offset + (int_of_char base)) in + String.init (10 + 26*2) (fun k -> + if k < 10 + then symbol 0 '0' k + else if k < 36 + then symbol 10 'a' k + else symbol 36 'A' k + ) let to_string_in_custom_base @@ -65,7 +69,7 @@ let to_string_in_custom_base done; addchar symbols.[int_of_big_int !n]; if isnegative then addchar '-'; - String.sub buff (!curr + 1) !count + Bytes.sub_string buff (!curr + 1) !count let to_string_in_base b n = if b <= 1 || b > 36 then invalid_arg From 180f9b9980b26084f99f02aaea52014358abd47a Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 17 Apr 2016 13:51:13 -0400 Subject: [PATCH 03/21] safe-string: make Buffer safe --- src/batBuffer.mliv | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/batBuffer.mliv b/src/batBuffer.mliv index ad1e328c9..bd45e02fa 100644 --- a/src/batBuffer.mliv +++ b/src/batBuffer.mliv @@ -63,12 +63,13 @@ val to_bytes : t -> Bytes.t *) val sub : t -> int -> int -> string -(** [Buffer.sub b off len] returns (a copy of) the substring of the - current contents of the buffer [b] starting at offset [off] of length - [len] bytes. May raise [Invalid_argument] if out of bounds request. The - buffer itself is unaffected. *) +(** [Buffer.sub b off len] returns a copy of [len] bytes from the + current contents of the buffer [b], starting at offset [off]. -val blit : t -> int -> string -> int -> int -> unit + Raise [Invalid_argument] if [srcoff] and [len] do not designate a valid + range of [b]. *) + +val blit : t -> int -> Bytes.t -> int -> int -> unit (** [Buffer.blit src srcoff dst dstoff len] copies [len] characters from the current contents of the buffer [src], starting at offset [srcoff] to string [dst], starting at character [dstoff]. From d64443a92f9f0aed9bffc8e9b4d033c73e07bb23 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 17 Apr 2016 13:51:23 -0400 Subject: [PATCH 04/21] safe-string: make Digest safe --- src/batDigest.mlv | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/src/batDigest.mlv b/src/batDigest.mlv index 86155358a..083c81c8a 100644 --- a/src/batDigest.mlv +++ b/src/batDigest.mlv @@ -21,14 +21,9 @@ include Digest -open BatIO - (*Imported from [Digest.input] -- the functions used take advantage of [BatIO.input] rather than [in_channel]*) -let input inp = - let digest = Bytes.create 16 in - let _ = really_input inp digest 0 16 in - digest +let input inp = BatIO.really_nread inp 16 (*$T let digest = Digest.string "azerty" in \ input (BatIO.input_string digest) = digest @@ -38,10 +33,8 @@ let output = BatIO.nwrite let print oc t = BatIO.nwrite oc (to_hex t) let channel inp len = (*TODO: Make efficient*) - if len >= 0 then - let buf = Bytes.create len in - let _ = BatIO.really_input inp buf 0 len in - Digest.string buf + if len >= 0 + then Digest.string (BatIO.really_nread inp len) else Digest.channel (BatIO.to_input_channel inp) len (*$T let digest = Digest.string "azerty" in \ @@ -78,11 +71,7 @@ let from_hex s = | _ -> raise (Invalid_argument "Digest.from_hex") in let byte i = digit s.[i] lsl 4 + digit s.[i+1] in - let result = Bytes.create 16 in - for i = 0 to 15 do - Bytes.set result i (Char.chr (byte (2 * i))); - done; - result + String.init 16 (fun i -> Char.chr (byte (2 * i))) (*$Q Q.string (fun s -> \ From 8cbb5d5809b754841d6d278dea777e865404f724 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 17 Apr 2016 13:53:49 -0400 Subject: [PATCH 05/21] safe-string: make Genlex safe --- src/batGenlex.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/batGenlex.ml b/src/batGenlex.ml index be772401b..7fc720a02 100644 --- a/src/batGenlex.ml +++ b/src/batGenlex.ml @@ -51,16 +51,16 @@ let to_enum_filter kwd_table = let reset_buffer () = buffer := initial_buffer; bufpos := 0 in let store c = - if !bufpos >= String.length !buffer then + if !bufpos >= Bytes.length !buffer then begin let newbuffer = Bytes.create (2 * !bufpos) in - String.blit !buffer 0 newbuffer 0 !bufpos; buffer := newbuffer + Bytes.blit !buffer 0 newbuffer 0 !bufpos; buffer := newbuffer end; Bytes.set !buffer !bufpos c; incr bufpos in let get_string () = - let s = String.sub !buffer 0 !bufpos in buffer := initial_buffer; s + let s = Bytes.sub_string !buffer 0 !bufpos in buffer := initial_buffer; s in let ident_or_keyword id = try Hashtbl.find kwd_table id with From d12b5f15da0d880369185b3e62fd263f7d2285c9 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sun, 17 Apr 2016 22:54:27 -0400 Subject: [PATCH 06/21] safe-string: make IO safe --- src/batBuffer.mlv | 2 +- src/batIO.ml | 46 ++++++++++++++++------------- src/batIO.mli | 73 ++++++++++++++++++++++++++-------------------- src/batInnerIO.ml | 51 ++++++++++++++++++++------------ src/batInnerIO.mli | 73 ++++++++++++++++++++++++++++------------------ 5 files changed, 144 insertions(+), 101 deletions(-) diff --git a/src/batBuffer.mlv b/src/batBuffer.mlv index db25e6f96..b10643686 100644 --- a/src/batBuffer.mlv +++ b/src/batBuffer.mlv @@ -75,7 +75,7 @@ let add_input t inp n = let output_buffer buf = BatInnerIO.create_out ~write: (add_char buf) - ~output:(fun s p l -> add_substring buf s p l; l) + ~output:(fun s p l -> add_subbytes buf s p l; l) ~close: (fun () -> contents buf) ~flush: BatInnerIO.noop diff --git a/src/batIO.ml b/src/batIO.ml index 65f6916df..3fe7a7157 100644 --- a/src/batIO.ml +++ b/src/batIO.ml @@ -133,7 +133,7 @@ let output_enum() = Buffer.add_char b x ) ~output:(fun s p l -> - Buffer.add_substring b s p l; + Buffer.add_subbytes b s p l; l ) ~close:(fun () -> @@ -401,7 +401,7 @@ let from_in_channel ch = let read() = try if ch#input cbuf 0 1 = 0 then raise Sys_blocked_io; - String.unsafe_get cbuf 0 + Bytes.unsafe_get cbuf 0 with End_of_file -> raise No_more_input in @@ -449,7 +449,7 @@ let from_in_chars ch = let from_out_chars ch = let output s p l = for i = p to p + l - 1 do - ch#put (String.unsafe_get s i) + ch#put (Bytes.unsafe_get s i) done; l in @@ -498,20 +498,25 @@ let lines_of2 ic = let find_eol () = let rec find_loop pos = if pos >= !end_pos then !read_pos - pos - else if buf.[pos] = '\n' then 1 + pos - !read_pos (* TODO: HANDLE CRLF *) + else if Bytes.get buf pos = '\n' + then 1 + pos - !read_pos (* TODO: HANDLE CRLF *) else find_loop (pos+1) in find_loop !read_pos in - let rec join_strings buf pos = function - | [] -> buf + let join_strings total_len accu = + let rec loop buf pos = function + | [] -> () | h::t -> - let len = String.length h in - String.blit h 0 buf (pos-len) len; - join_strings buf (pos-len) t + let len = Bytes.length h in + Bytes.blit h 0 buf (pos-len) len; + loop buf (pos-len) t in + let buf = Bytes.create total_len in + loop buf total_len accu; + Bytes.unsafe_to_string buf in let input_buf s o l = - String.blit buf !read_pos s o l; + Bytes.blit buf !read_pos s o l; read_pos := !read_pos + l; if !end_pos = !read_pos then try @@ -529,15 +534,15 @@ let lines_of2 ic = let n = find_eol () in if n = 0 then match accu with (* EOF *) | [] -> close_in ic; raise BatEnum.No_more_elements - | _ -> join_strings (Bytes.create len) len accu + | _ -> join_strings len accu else if n > 0 then (* newline found *) let res = Bytes.create (n-1) in input_buf res 0 (n-1); - input_buf " " 0 1; (* throw away EOL *) + input_buf (Bytes.of_string " ") 0 1; (* throw away EOL *) match accu with - | [] -> res + | [] -> Bytes.unsafe_to_string res | _ -> let len = len + n-1 in - join_strings (Bytes.create len) len (res :: accu) + join_strings len (res :: accu) else (* n < 0 ; no newline found *) let piece = Bytes.create (-n) in input_buf piece 0 (-n); @@ -564,17 +569,18 @@ let tab_out ?(tab=' ') n out = write out c; if is_newline c then nwrite out spaces; ) - ~output:(fun s p l -> (*Replace each newline within the segment with newline^spaces*) (*FIXME?: performance - instead output each line and a newline between each char? *) - let length = String.length s in - let buffer = Buffer.create (String.length s) in + ~output:(fun s p l -> + (*Replace each newline within the segment with newline^spaces*) + let length = Bytes.length s in + let buffer = Buffer.create length in for i = p to min (length - 1) l do - let c = String.unsafe_get s i in + let c = Bytes.unsafe_get s i in Buffer.add_char buffer c; if is_newline c then Buffer.add_string buffer spaces done; - let s' = Buffer.contents buffer in - output out s' 0 (String.length s')) + let s' = Buffer.to_bytes buffer in + really_output out s' 0 (Bytes.length s')) ~flush:noop ~close:noop ~underlying:[out] diff --git a/src/batIO.mli b/src/batIO.mli index f7d870da7..affb11ea2 100644 --- a/src/batIO.mli +++ b/src/batIO.mli @@ -189,13 +189,13 @@ val really_nread : input -> int -> string Example: [let read_md5 ch = really_nread ch 32] *) -val input : input -> string -> int -> int -> int -(** [input i s p l] reads up to [l] characters from the given input, - storing them in string [s], starting at character number [p]. It +val input : input -> Bytes.t -> int -> int -> int +(** [input i s p len] reads up to [len] characters from the given input, + storing them in byte sequence [s], starting at character number [p]. It returns the actual number of characters read (which may be 0) or raise [No_more_input] if no character can be read. It will raise - [Invalid_argument] if [p] and [l] do not designate a valid - substring of [s]. + [Invalid_argument] if [p] and [len] do not designate a valid + subsequence of [s]. Example: [let map_ch f ?(block_size=100) = let b = String.create block_size in @@ -205,16 +205,15 @@ val input : input -> string -> int -> int -> int done with No_more_input -> ()] *) -val really_input : input -> string -> int -> int -> int -(** [really_input i s p l] reads exactly [l] characters from the - given input, storing them in the string [s], starting at +val really_input : input -> Bytes.t -> int -> int -> int +(** [really_input ic s p len] reads exactly [len] characters from the + input [ic], storing them in the string [s], starting at position [p]. For consistency with {!BatIO.input} it returns - [l]. @raise No_more_input if at [l] characters are not - available. @raise Invalid_argument if [p] and [l] do not + [len]. @raise No_more_input if at [len] characters are not + available. @raise Invalid_argument if [p] and [len] do not designate a valid substring of [s]. Example: [let _ = really_input stdin b 0 3] - *) val close_in : input -> unit @@ -235,27 +234,37 @@ val nwrite : (string, _) printer Example: [nwrite stdout "Enter your name: ";] *) -val output : 'a output -> string -> int -> int -> int -(** [output o s p l] writes up to [l] characters from string [s], starting at - offset [p]. It returns the number of characters written. It will raise - [Invalid_argument] if [p] and [l] do not designate a valid substring of [s]. +val output : 'a output -> Bytes.t -> int -> int -> int +(** [output o s p len] writes up to [len] characters from byte + sequence [s], starting at offset [p]. It returns the number of + characters written. It will raise [Invalid_argument] if [p] and + [len] do not designate a valid subsequence of [s]. - Example: [let str = "Foo Bar Baz" in let written = output stdout str 2 4;] + Example: [let written = output stdout (Bytes.to_string "Foo Bar Baz") 2 4] - This writes "o Ba" to stdout. -*) + This writes "o Ba" to stdout, and returns 4. + *) -val really_output : 'a output -> string -> int -> int -> int -(** [really_output o s p l] writes exactly [l] characters from string [s] onto - the the output, starting with the character at offset [p]. For consistency with - {!BatIO.output} it returns [l]. @raise Invalid_argument if [p] and [l] do not - designate a valid substring of [s]. +val output_substring : 'a output -> string -> int -> int -> int +(** like [output] above, but outputs from a substring instead of + a subsequence of bytes *) + +val really_output : 'a output -> Bytes.t -> int -> int -> int +(** [really_output o s p len] writes exactly [len] characters from + byte sequence [s] onto the the output, starting with the character + at offset [p]. For consistency with {!BatIO.output} it returns + [len]. @raise Invalid_argument if [p] and [len] do not designate + a valid subsequence of [s]. This function is useful for networking situations where the output buffer might fill resulting in not the entire substring being readied for transmission. Uses [output] internally, and will raise [Sys_blocked_io] in the case that any call returns 0. -*) + *) + +val really_output_substring : 'a output -> string -> int -> int -> int +(** like [really_output] above, but outputs from a substring instead + of a subsequence of bytes *) val flush : 'a output -> unit (** Flush an output. @@ -593,7 +602,7 @@ val drop_bits : in_bits -> unit val create_in : read:(unit -> char) -> - input:(string -> int -> int -> int) -> + input:(Bytes.t -> int -> int -> int) -> close:(unit -> unit) -> input (** Fully create an input by giving all the needed functions. @@ -604,7 +613,7 @@ val create_in : val wrap_in : read:(unit -> char) -> - input:(string -> int -> int -> int) -> + input:(Bytes.t -> int -> int -> int) -> close:(unit -> unit) -> underlying:(input list) -> input @@ -622,7 +631,7 @@ val wrap_in : val inherit_in: ?read:(unit -> char) -> - ?input:(string -> int -> int -> int) -> + ?input:(Bytes.t -> int -> int -> int) -> ?close:(unit -> unit) -> input -> input (** Simplified and optimized version of {!wrap_in} which may be used @@ -638,7 +647,7 @@ val inherit_in: val create_out : write:(char -> unit) -> - output:(string -> int -> int -> int) -> + output:(Bytes.t -> int -> int -> int) -> flush:(unit -> unit) -> close:(unit -> 'a) -> 'a output @@ -657,7 +666,7 @@ val create_out : val wrap_out : write:(char -> unit) -> - output:(string -> int -> int -> int) -> + output:(Bytes.t -> int -> int -> int) -> flush:(unit -> unit) -> close:(unit -> 'a) -> underlying:('b output list) -> @@ -708,7 +717,7 @@ val wrap_out : val inherit_out: ?write:(char -> unit) -> - ?output:(string -> int -> int -> int) -> + ?output:(Bytes.t -> int -> int -> int) -> ?flush:(unit -> unit) -> ?close:(unit -> unit) -> 'a output -> unit output @@ -774,13 +783,13 @@ val to_input_channel : input -> in_channel class in_channel : input -> object - method input : string -> int -> int -> int + method input : Bytes.t -> int -> int -> int method close_in : unit -> unit end class out_channel : 'a output -> object - method output : string -> int -> int -> int + method output : Bytes.t -> int -> int -> int method flush : unit -> unit method close_out : unit -> unit end diff --git a/src/batInnerIO.ml b/src/batInnerIO.ml index 328482c69..4360ef1d5 100644 --- a/src/batInnerIO.ml +++ b/src/batInnerIO.ml @@ -28,7 +28,7 @@ let weak_iter f s = BatInnerWeaktbl.iter (fun x _ -> f x) s type input = { mutable in_read : unit -> char; - mutable in_input : string -> int -> int -> int; + mutable in_input : Bytes.t -> int -> int -> int; mutable in_close : unit -> unit; in_id: int;(**A unique identifier.*) in_upstream: input weak_set @@ -36,7 +36,7 @@ type input = { type 'a output = { mutable out_write : char -> unit; - mutable out_output: string -> int -> int -> int; + mutable out_output: Bytes.t -> int -> int -> int; mutable out_close : unit -> 'a; mutable out_flush : unit -> unit; out_id: int;(**A unique identifier.*) @@ -217,14 +217,14 @@ let nread i n = p := !p + r; l := !l - r; done; - s + Bytes.unsafe_to_string s with No_more_input as e -> if !p = 0 then raise e; - String.sub s 0 !p + Bytes.sub_string s 0 !p let really_output o s p l' = - let sl = String.length s in + let sl = Bytes.length s in if p + l' > sl || p < 0 || l' < 0 then invalid_arg "BatIO.really_output"; let l = ref l' in let p = ref p in @@ -236,8 +236,11 @@ let really_output o s p l' = done; l' +let really_output_substring o s p l' = + really_output o (Bytes.of_string s) p l' + let input i s p l = - let sl = String.length s in + let sl = Bytes.length s in if p + l > sl || p < 0 || l < 0 then invalid_arg "BatIO.input"; if l = 0 then 0 @@ -245,7 +248,7 @@ let input i s p l = i.in_input s p l let really_input i s p l' = - let sl = String.length s in + let sl = Bytes.length s in if p + l' > sl || p < 0 || l' < 0 then invalid_arg "BatIO.really_input"; let l = ref l' in let p = ref p in @@ -264,14 +267,13 @@ let really_nread i n = let s = Bytes.create n in ignore(really_input i s 0 n); - s - + Bytes.unsafe_to_string s let write o x = o.out_write x -let nwrite o s = +let nwrite_bytes o s = let p = ref 0 in - let l = ref (String.length s) in + let l = ref (Bytes.length s) in while !l > 0 do let w = o.out_output s !p !l in (* FIXME: unknown how many characters were already written *) @@ -280,11 +282,16 @@ let nwrite o s = l := !l - w; done +let nwrite o s = nwrite_bytes o (Bytes.unsafe_of_string s) + let output o s p l = - let sl = String.length s in + let sl = Bytes.length s in if p + l > sl || p < 0 || l < 0 then invalid_arg "BatIO.output"; o.out_output s p l +let output_substring o s p l = + output o (Bytes.unsafe_of_string s) p l + let flush o = o.out_flush() let flush_all () = @@ -313,9 +320,9 @@ let read_all i = | Input_closed -> let buf = Bytes.create !pos in List.iter (fun (s,p) -> - String.unsafe_blit s 0 buf p (String.length s) + Bytes.blit_string s 0 buf p (String.length s) ) !str; - buf + Bytes.unsafe_to_string buf let input_string s = let pos = ref 0 in @@ -327,7 +334,7 @@ let input_string s = ~input:(fun sout p l -> if !pos >= len then raise No_more_input; let n = (if !pos + l > len then len - !pos else l) in - String.unsafe_blit s (post pos ( (+) n ) ) sout p n; + Bytes.blit_string s (post pos ( (+) n ) ) sout p n; n ) ~close:noop @@ -349,7 +356,7 @@ let output_string() = let b = Buffer.create default_buffer_size in create_out ~write: (fun c -> Buffer.add_char b c ) - ~output: (fun s p l -> Buffer.add_substring b s p l; l ) + ~output: (fun s p l -> Buffer.add_subbytes b s p l; l ) ~close: (fun () -> Buffer.contents b) ~flush: noop @@ -416,8 +423,11 @@ let pipe() = in let input s p l = if !inpos = String.length !input then flush(); - let r = (if !inpos + l > String.length !input then String.length !input - !inpos else l) in - String.unsafe_blit !input !inpos s p r; + let r = + if !inpos + l <= String.length !input + then l + else String.length !input - !inpos in + Bytes.blit_string !input !inpos s p r; inpos := !inpos + r; r in @@ -425,7 +435,7 @@ let pipe() = Buffer.add_char output c in let output s p l = - Buffer.add_substring output s p l; + Buffer.add_subbytes output s p l; l in let input = create_in ~read ~input ~close:noop @@ -571,6 +581,9 @@ let write_string o s = nwrite o s; write o '\000' +let write_bytes o b = + nwrite o b + let write_line o s = nwrite o s; write o '\n' diff --git a/src/batInnerIO.mli b/src/batInnerIO.mli index 9130f8536..16510b03c 100644 --- a/src/batInnerIO.mli +++ b/src/batInnerIO.mli @@ -70,19 +70,21 @@ val really_nread : input -> int -> string from the input. @raise No_more_input if at least [n] characters are not available. @raise Invalid_argument if [n] < 0. *) -val input : input -> string -> int -> int -> int -(** [input i s p l] reads up to [l] characters from the given input, storing - them in string [s], starting at character number [p]. It returns the actual - number of characters read or raise [No_more_input] if no character can be - read. It will raise [Invalid_argument] if [p] and [l] do not designate a - valid substring of [s]. *) - -val really_input : input -> string -> int -> int -> int -(** [really_input i s p l] reads exactly [l] characters from the given input, - storing them in the string [s], starting at position [p]. For consistency with - {!BatIO.input} it returns [l]. @raise No_more_input if at [l] characters are - not available. @raise Invalid_argument if [p] and [l] do not designate a - valid substring of [s]. *) +val input : input -> Bytes.t -> int -> int -> int +(** [input i s p len] reads up to [len] bytes from the given input, + storing them in byte sequence [s], starting at position [p]. It + returns the actual number of bytes read or raise + [No_more_input] if no character can be read. It will raise + [Invalid_argument] if [p] and [len] do not designate a valid + subsequence of [s]. *) + +val really_input : input -> Bytes.t -> int -> int -> int +(** [really_input i s p len] reads exactly [len] characters from the + given input, storing them in the byte sequence [s], starting at + position [p]. For consistency with {!BatIO.input} it returns + [len]. @raise No_more_input if at least [len] characters are not + available. @raise Invalid_argument if [p] and [len] do not designate + a valid subsequence of [s]. *) val close_in : input -> unit (** Close the input. It can no longer be read from. *) @@ -97,16 +99,29 @@ val write : 'a output -> char -> unit val nwrite : 'a output -> string -> unit (** Write a string to an output. *) -val output : 'a output -> string -> int -> int -> int -(** [output o s p l] writes up to [l] characters from string [s], starting at - offset [p]. It returns the number of characters written. It will raise - [Invalid_argument] if [p] and [l] do not designate a valid substring of [s]. *) +val nwrite_bytes : 'a output -> Bytes.t -> unit +(** Write a byte sequence to an output. *) -val really_output : 'a output -> string -> int -> int -> int -(** [really_output o s p l] writes exactly [l] characters from string [s] onto - the the output, starting with the character at offset [p]. For consistency with - {!BatIO.output} it returns [l]. @raise Invalid_argument if [p] and [l] do not - designate a valid substring of [s]. *) +val output : 'a output -> Bytes.t -> int -> int -> int +(** [output o s p len] writes up to [len] characters from byte + sequence [len], starting at offset [p]. It returns the number of + characters written. It will raise [Invalid_argument] if [p] and + [len] do not designate a valid subsequence of [s]. *) + +val output_substring : 'a output -> string -> int -> int -> int +(** like [output] above, but outputs from a substring instead of + a subsequence of bytes *) + +val really_output : 'a output -> Bytes.t -> int -> int -> int +(** [really_output o s p len] writes exactly [len] characters from + byte sequence [s] onto the the output, starting with the character + at offset [p]. For consistency with {!BatIO.output} it returns + [len]. @raise Invalid_argument if [p] and [len] do not designate + a valid subsequence of [s]. *) + +val really_output_substring : 'a output -> string -> int -> int -> int +(** like [really_output] above, but outputs from a substring instead + of a subsequence of bytes *) val flush : 'a output -> unit (** Flush an output. *) @@ -136,7 +151,7 @@ val on_close_out : 'a output -> ('a output -> unit) -> unit val create_in : read:(unit -> char) -> - input:(string -> int -> int -> int) -> + input:(Bytes.t -> int -> int -> int) -> close:(unit -> unit) -> input (** Fully create an input by giving all the needed functions. @@ -147,7 +162,7 @@ val create_in : val inherit_in: ?read:(unit -> char) -> - ?input:(string -> int -> int -> int) -> + ?input:(Bytes.t -> int -> int -> int) -> ?close:(unit -> unit) -> input -> input (** @@ -158,7 +173,7 @@ val inherit_in: val wrap_in : read:(unit -> char) -> - input:(string -> int -> int -> int) -> + input:(Bytes.t -> int -> int -> int) -> close:(unit -> unit) -> underlying:(input list) -> input @@ -173,7 +188,7 @@ val wrap_in : val create_out : write:(char -> unit) -> - output:(string -> int -> int -> int) -> + output:(Bytes.t -> int -> int -> int) -> flush:(unit -> unit) -> close:(unit -> 'a) -> 'a output @@ -192,7 +207,7 @@ val create_out : val inherit_out: ?write:(char -> unit) -> - ?output:(string -> int -> int -> int) -> + ?output:(Bytes.t -> int -> int -> int) -> ?flush:(unit -> unit) -> ?close:(unit -> unit) -> _ output -> unit output @@ -204,7 +219,7 @@ val inherit_out: val wrap_out : write:(char -> unit) -> - output:(string -> int -> int -> int) -> + output:(Bytes.t -> int -> int -> int) -> flush:(unit -> unit) -> close:(unit -> 'a) -> underlying:('b output list) -> @@ -437,7 +452,7 @@ external noop : unit -> unit = "%ignore" {7 Optimized access to fields} *) -val get_output : _ output -> (string -> int -> int -> int) +val get_output : _ output -> (Bytes.t -> int -> int -> int) val get_flush : _ output -> (unit -> unit) val lock : BatConcurrent.lock ref From 310259bb2494e8b54a3d8cf402f291a702e9d488 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Mon, 18 Apr 2016 22:59:38 -0400 Subject: [PATCH 07/21] safe-string: make Format safe --- src/batFormat.mlv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/batFormat.mlv b/src/batFormat.mlv index b1a9c244f..43e88db7c 100644 --- a/src/batFormat.mlv +++ b/src/batFormat.mlv @@ -25,13 +25,13 @@ include Format (* internal functions *) -let output_of out = fun s i o -> ignore (really_output out s i o) +let output_of out = fun s i o -> ignore (really_output_substring out s i o) let flush_of out = BatInnerIO.get_flush out let newline_of out = fun () -> BatInnerIO.write out '\n' let spaces_of out = (* Default function to output spaces. Copied from base format.ml*) - let blank_line = String.make 80 ' ' in + let blank_line = Bytes.make 80 ' ' in let rec display_blanks n = if n > 0 then if n <= 80 then ignore (really_output out blank_line 0 n) else From 6153feee1ee88ce56ee87cc81e544673292918b2 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 08:41:15 -0400 Subject: [PATCH 08/21] safe-string: make Int32 safe --- src/batInt32.mliv | 20 +++++++------- src/batInt32.mlv | 69 ++++++++++++++++++++++++++--------------------- 2 files changed, 49 insertions(+), 40 deletions(-) diff --git a/src/batInt32.mliv b/src/batInt32.mliv index e1176c767..80c50af7f 100644 --- a/src/batInt32.mliv +++ b/src/batInt32.mliv @@ -217,20 +217,20 @@ external float_of_bits : int32 -> float = "caml_int32_float_of_bits" val of_byte : char -> int32 val to_byte : int32 -> char -val pack : string -> int -> int32 -> unit -(** [pack str off i] writes the little endian bit representation - of [i] into string [str] at offset [off] *) +val pack : Bytes.t -> int -> int32 -> unit +(** [pack s off i] writes the little endian bit representation + of [i] into byte sequence [s] at offset [off] *) -val pack_big : string -> int -> int32 -> unit -(** [pack_big str off i] writes the big endian bit - representation of [i] into string [str] at offset [off] *) +val pack_big : Bytes.t -> int -> int32 -> unit +(** [pack_big s off i] writes the big endian bit + representation of [i] into byte sequence [s] at offset [off] *) -val unpack : string -> int -> int32 -(** [unpack str off] reads 4 bytes from string [str] starting at +val unpack : Bytes.t -> int -> int32 +(** [unpack s off] reads 4 bytes from byte sequence [str] starting at offset [off] as a little-endian int32 *) -val unpack_big : string -> int -> int32 -(** [unpack str off] reads 4 bytes from string [str] starting at +val unpack_big : Bytes.t -> int -> int32 +(** [unpack s off] reads 4 bytes from byte sequence [str] starting at offset [off] as a big-endian int32 *) val compare : t -> t -> int diff --git a/src/batInt32.mlv b/src/batInt32.mlv index e2b884bee..71e490ca6 100644 --- a/src/batInt32.mlv +++ b/src/batInt32.mlv @@ -37,7 +37,7 @@ let of_byte b = Char.code b |> Int32.of_int (* really need to just blit an int32 word into a string and vice versa *) let pack str pos item = - if String.length str < pos + 4 then invalid_arg "Int32.pack: pos too close to end of string"; + if Bytes.length str < pos + 4 then invalid_arg "Int32.pack: pos too close to end of string"; if pos < 0 then invalid_arg "Int32.pack: pos negative"; Bytes.set str pos (to_byte item); let item = Int32.shift_right item 8 in @@ -48,16 +48,18 @@ let pack str pos item = Bytes.set str (pos + 3) (to_byte item) (* optimize out last logand? *) (*$T pack - let str = " " in pack str 0 0l; (str = "\000\000\000\000") - let str = " " in pack str 0 0l; (str = "\000\000\000\000 ") - let str = " " in pack str 1 0l; (str = " \000\000\000\000") - let str = " " in try pack str 0 0l; false with Invalid_argument _ -> true - let str = " " in try pack str 1 0l; false with Invalid_argument _ -> true + let str = Bytes.of_string " " in pack str 0 0l; (Bytes.to_string str = "\000\000\000\000") + let str = Bytes.of_string " " in pack str 0 0l; (Bytes.to_string str = "\000\000\000\000 ") + let str = Bytes.of_string " " in pack str 1 0l; (Bytes.to_string str = " \000\000\000\000") + let str = Bytes.of_string " " in try pack str 0 0l; false with Invalid_argument _ -> true + let str = Bytes.of_string " " in try pack str 1 0l; false with Invalid_argument _ -> true *) let pack_big str pos item = - if String.length str < pos + 4 then invalid_arg "Int32.pack_big: pos too close to end of string"; - if pos < 0 then invalid_arg "Int32.pack_big: pos negative"; + if Bytes.length str < pos + 4 then + invalid_arg "Int32.pack_big: pos too close to end of string"; + if pos < 0 then + invalid_arg "Int32.pack_big: pos negative"; Bytes.set str (pos + 3) (to_byte item); let item = Int32.shift_right item 8 in Bytes.set str (pos + 2) (to_byte item); @@ -67,51 +69,58 @@ let pack_big str pos item = Bytes.set str pos (to_byte item) (* optimize out last logand? *) (*$T pack_big - let str = " " in pack_big str 0 0l; (str = "\000\000\000\000") - let str = " " in pack_big str 0 0l; (str = "\000\000\000\000 ") - let str = " " in pack_big str 1 0l; (str = " \000\000\000\000") - let str = " " in try pack_big str 0 0l; false with Invalid_argument _ -> true - let str = " " in try pack_big str 1 0l; false with Invalid_argument _ -> true + let str = Bytes.of_string " " in pack_big str 0 0l; (Bytes.to_string str = "\000\000\000\000") + let str = Bytes.of_string " " in pack_big str 0 0l; (Bytes.to_string str = "\000\000\000\000 ") + let str = Bytes.of_string " " in pack_big str 1 0l; (Bytes.to_string str = " \000\000\000\000") + let str = Bytes.of_string " " in try pack_big str 0 0l; false with Invalid_argument _ -> true + let str = Bytes.of_string " " in try pack_big str 1 0l; false with Invalid_argument _ -> true *) let unpack str pos = - if String.length str < pos + 4 then invalid_arg "Int32.unpack: pos + 4 not within string"; + if Bytes.length str < pos + 4 + then invalid_arg "Int32.unpack: pos + 4 not within string"; if pos < 0 then invalid_arg "Int32.unpack: pos negative"; let shift n = Int32.shift_left n 8 and add b n = Int32.add (of_byte b) n in - of_byte str.[pos+3] |> shift |> add str.[pos+2] |> shift - |> add str.[pos+1] |> shift |> add str.[pos] + of_byte (Bytes.unsafe_get str (pos+3)) |> shift + |> add (Bytes.unsafe_get str (pos+2)) |> shift + |> add (Bytes.unsafe_get str (pos+1)) |> shift + |> add (Bytes.unsafe_get str pos) (* TODO: improve performance of bit twiddling? will these curried functions get inlined? *) (*$T unpack - unpack "\000\000\000\000" 0 = 0l - unpack "\000\000\000\000 " 0 = 0l - unpack " \000\000\000\000" 1 = 0l - unpack "\255\000\000\000" 0 = 255l + unpack (Bytes.of_string "\000\000\000\000") 0 = 0l + unpack (Bytes.of_string "\000\000\000\000 ") 0 = 0l + unpack (Bytes.of_string " \000\000\000\000") 1 = 0l + unpack (Bytes.of_string "\255\000\000\000") 0 = 255l *) (*$Q pack; unpack - Q.int (let str = " " in fun x -> let x = Int32.of_int x in pack str 0 x; unpack str 0 = x) + Q.int (let str = Bytes.of_string " " in fun x -> let x = Int32.of_int x in pack str 0 x; unpack str 0 = x) *) let unpack_big str pos = - if String.length str < pos + 4 then invalid_arg "Int32.unpack: pos + 4 not within string"; - if pos < 0 then invalid_arg "Int32.unpack: pos negative"; + if Bytes.length str < pos + 4 then + invalid_arg "Int32.unpack: pos + 4 not within string"; + if pos < 0 then + invalid_arg "Int32.unpack: pos negative"; let shift n = Int32.shift_left n 8 and add b n = Int32.add (of_byte b) n in - of_byte str.[pos] |> shift |> add str.[pos+1] |> shift - |> add str.[pos+2] |> shift |> add str.[pos+3] + of_byte (Bytes.unsafe_get str pos) |> shift + |> add (Bytes.unsafe_get str (pos+1)) |> shift + |> add (Bytes.unsafe_get str (pos+2)) |> shift + |> add (Bytes.unsafe_get str (pos+3)) (*$T unpack_big - unpack_big "\000\000\000\000" 0 = 0l - unpack_big "\000\000\000\000 " 0 = 0l - unpack_big " \000\000\000\000 " 1 = 0l - unpack_big "\000\000\000\255" 0 = 255l + unpack_big (Bytes.of_string "\000\000\000\000") 0 = 0l + unpack_big (Bytes.of_string "\000\000\000\000 ") 0 = 0l + unpack_big (Bytes.of_string " \000\000\000\000 ") 1 = 0l + unpack_big (Bytes.of_string "\000\000\000\255") 0 = 255l *) (*$Q pack_big; unpack_big - Q.int (let str = " " in fun x -> let x = Int32.of_int x in pack_big str 0 x; unpack_big str 0 = x) + Q.int (let str = Bytes.of_string " " in fun x -> let x = Int32.of_int x in pack_big str 0 x; unpack_big str 0 = x) *) module BaseInt32 = struct From 1a9b9806c33e42489c61d9ca2f620a1592d7b967 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 08:41:25 -0400 Subject: [PATCH 09/21] safe-string: make Marshal safe --- src/batMarshal.mlv | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/batMarshal.mlv b/src/batMarshal.mlv index da728a03d..493aee62c 100644 --- a/src/batMarshal.mlv +++ b/src/batMarshal.mlv @@ -22,6 +22,10 @@ include Marshal +##V<4.2##let from_bytes = from_string +##V<4.2##external to_bytes : +##V<4.2## 'a -> extern_flags list -> Bytes.t = "caml_output_value_to_string" + let output out ?(sharing=true) ?(closures=false) v = let flags = match sharing, closures with | true, false -> [] @@ -33,15 +37,18 @@ let output out ?(sharing=true) ?(closures=false) v = BatInnerIO.nwrite out buf let input inp = - let header = BatInnerIO.really_nread inp header_size in - let size = data_size header 0 in - from_string (header ^ (BatInnerIO.really_nread inp size)) 0 + let header = Bytes.create header_size in + let read = BatInnerIO.really_input inp header 0 header_size in + assert (read = header_size); + let data_size = data_size header 0 in + let buf = Bytes.extend header 0 data_size in + let read = BatInnerIO.really_input inp buf header_size data_size in + assert (read = data_size); + from_bytes buf 0 + +let from_channel = input let to_channel out v flags = BatInnerIO.nwrite out (to_string v flags) -let from_channel = input -##V<4.2##let from_bytes = from_string -##V<4.2##external to_bytes : -##V<4.2## 'a -> extern_flags list -> Bytes.t = "caml_output_value_to_string" From 4bbf6907f3026840652b86ad344674ddc8de131c Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 08:46:41 -0400 Subject: [PATCH 10/21] safe-string: make Pervasives safe --- src/batPervasives.ml | 8 +++++--- src/batPervasives.mliv | 30 ++++++++++++++++++------------ 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/batPervasives.ml b/src/batPervasives.ml index 578e1d32b..c2a829451 100644 --- a/src/batPervasives.ml +++ b/src/batPervasives.ml @@ -58,13 +58,13 @@ let input_all ic = if n = 0 then let res = Bytes.create total in let pos = total - ofs in - let _ = String.blit buf 0 res pos ofs in + let _ = Bytes.blit buf 0 res pos ofs in let coll pos buf = let new_pos = pos - buf_len in - String.blit buf 0 res new_pos buf_len; + Bytes.blit buf 0 res new_pos buf_len; new_pos in let _ = List.fold_left coll pos acc in - res + Bytes.unsafe_to_string res else let new_ofs = ofs + n in let new_total = total + n in @@ -213,6 +213,8 @@ let output_char = BatChar.print let output_string = BatString.print let output oc buf pos len = ignore (BatIO.output oc buf pos len) +let output_substring oc buf pos len = + ignore (BatIO.output_substring oc buf pos len) let output_byte = BatIO.write_byte let output_binary_int = BatIO.write_i32 let output_binary_float out v= BatIO.write_i64 out (BatInt64.bits_of_float v) diff --git a/src/batPervasives.mliv b/src/batPervasives.mliv index 062076bbc..9bb1aee09 100644 --- a/src/batPervasives.mliv +++ b/src/batPervasives.mliv @@ -221,8 +221,14 @@ val output_char : unit BatIO.output -> char -> unit val output_string : unit BatIO.output -> string -> unit (** Write the string on the given output channel. *) -val output : unit BatIO.output -> string -> int -> int -> unit -(** [output oc buf pos len] writes [len] characters from string [buf], +val output : unit BatIO.output -> Bytes.t -> int -> int -> unit +(** [output oc buf pos len] writes [len] characters from byte sequence [buf], + starting at offset [pos], to the given output channel [oc]. + @raise Invalid_argument if [pos] and [len] do not + designate a valid subsequence of [buf]. *) + +val output_substring : unit BatIO.output -> string -> int -> int -> unit +(** [output_substring oc buf pos len] writes [len] characters from string [buf], starting at offset [pos], to the given output channel [oc]. @raise Invalid_argument if [pos] and [len] do not designate a valid substring of [buf]. *) @@ -319,9 +325,9 @@ val input_line : BatIO.input -> string @raise End_of_file if the end of the file is reached at the beginning of line. *) -val input : BatIO.input -> string -> int -> int -> int -(** [input ic buf pos len] reads up to [len] characters from - the given channel [ic], storing them in string [buf], starting at +val input : BatIO.input -> Bytes.t -> int -> int -> int +(** [input ic buf pos len] reads up to [len] characters from the given + channel [ic], storing them in byte sequence [buf], starting at character number [pos]. It returns the actual number of characters read, between 0 and [len] (inclusive). @@ -334,15 +340,15 @@ val input : BatIO.input -> string -> int -> int -> int if desired. (See also {!Pervasives.really_input} for reading exactly [len] characters.) @raise Invalid_argument if [pos] and [len] - do not designate a valid substring of [buf]. *) + do not designate a valid subsequence of [buf]. *) -val really_input : BatIO.input -> string -> int -> int -> unit -(** [really_input ic buf pos len] reads [len] characters from channel [ic], - storing them in string [buf], starting at character number [pos]. - @raise End_of_file if the end of file is reached before [len] - characters have been read. +val really_input : BatIO.input -> Bytes.t -> int -> int -> unit +(** [really_input ic buf pos len] reads [len] characters from channel + [ic], storing them in byte sequence [buf], starting at character + number [pos]. @raise End_of_file if the end of file is reached + before [len] characters have been read. @raise Invalid_argument if - [pos] and [len] do not designate a valid substring of [buf]. *) + [pos] and [len] do not designate a valid subsequence of [buf]. *) val input_byte : BatIO.input -> int (** Same as {!Pervasives.input_char}, but return the 8-bit integer representing From 1c5f96a702ed329b0c4178cc94145ef8f9e79fff Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 17:25:39 -0400 Subject: [PATCH 11/21] safe-string: make Printf safe --- src/batPrintf.mlv | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/batPrintf.mlv b/src/batPrintf.mlv index 268317927..2d70fd828 100644 --- a/src/batPrintf.mlv +++ b/src/batPrintf.mlv @@ -97,11 +97,11 @@ let parse_string_conversion sfmt = let pad_string pad_char p neg s i len = if p = len && i = 0 then s else if p <= len then String.sub s i len else - let res = String.make p pad_char in + let res = Bytes.make p pad_char in if neg - then String.blit s i res 0 len - else String.blit s i res (p - len) len; - res + then Bytes.blit_string s i res 0 len + else Bytes.blit_string s i res (p - len) len; + Bytes.unsafe_to_string res (* Format a string given a %s format, e.g. %40s or %-20s. To do: ignore other flags (#, +, etc)? *) @@ -134,8 +134,9 @@ let extract_format_int conv fmt start stop widths = let sfmt = extract_format fmt start stop widths in match conv with | 'n' | 'N' -> - Bytes.set sfmt (String.length sfmt - 1) 'u'; - sfmt + let sfmt = Bytes.of_string sfmt in + Bytes.set sfmt (Bytes.length sfmt - 1) 'u'; + Bytes.unsafe_to_string sfmt | _ -> sfmt;; (* Returns the position of the next character following the meta format From aed6515c58b93aa62bc51c878d8e28a88c6086a4 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 20:18:28 -0400 Subject: [PATCH 12/21] safe-string: make String safe (but String.Cap) --- src/batString.mliv | 31 ++++++++++++----------- src/batString.mlv | 61 ++++++++++++++++++++++++---------------------- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/src/batString.mliv b/src/batString.mliv index ec7316cc0..393a7d3b4 100644 --- a/src/batString.mliv +++ b/src/batString.mliv @@ -102,14 +102,14 @@ external get : string -> int -> char = "%string_safe_get" @raise Invalid_argument if [n] not a valid character number in [s]. *) -external set : string -> int -> char -> unit = "%string_safe_set" +external set : Bytes.t -> int -> char -> unit = "%string_safe_set" (** [String.set s n c] modifies string [s] in place, replacing the character number [n] by [c]. You can also write [s.[n] <- c] instead of [String.set s n c]. @raise Invalid_argument if [n] is not a valid character number in [s]. *) -external create : int -> string = "caml_create_string" +external create : int -> Bytes.t = "caml_create_string" (** [String.create n] returns a fresh string of length [n]. The string initially contains arbitrary characters. @@ -132,19 +132,17 @@ val sub : string -> int -> int -> string @raise Invalid_argument if [start] and [len] do not designate a valid substring of [s]. *) -val fill : string -> int -> int -> char -> unit -(** [String.fill s start len c] modifies string [s] in place, - replacing [len] characters by [c], starting at [start]. +val fill : Bytes.t -> int -> int -> char -> unit +(** [String.fill s start len c] modifies the byte sequence [s] in + place, replacing [len] characters by [c], starting at [start]. @raise Invalid_argument if [start] and [len] do not designate a valid substring of [s]. *) -val blit : string -> int -> string -> int -> int -> unit +val blit : string -> int -> Bytes.t -> int -> int -> unit (** [String.blit src srcoff dst dstoff len] copies [len] characters - from string [src], starting at character number [srcoff], to - string [dst], starting at character number [dstoff]. It works - correctly even if [src] and [dst] are the same string, - and the source and destination intervals overlap. + from string [src], starting at character number [srcoff], to the + byte sequence [dst], starting at character number [dstoff]. @raise Invalid_argument if [srcoff] and [len] do not designate a valid substring of [src], or if [dstoff] and [len] @@ -698,12 +696,12 @@ val rev : string -> string (** {6 In-Place Transformations}*) -val rev_in_place : string -> unit -(** [rev_in_place s] mutates the string [s], so that its new value is +val rev_in_place : Bytes.t -> unit +(** [rev_in_place s] mutates the byte sequence [s], so that its new value is the mirror of its old one: for instance if s contained ["Example!"], after the mutation it will contain ["!elpmaxE"]. *) -val in_place_mirror : string -> unit +val in_place_mirror : Bytes.t -> unit (** @deprecated Use {!String.rev_in_place} instead *) (** {6 Splitting around}*) @@ -1207,9 +1205,10 @@ end (* The following is for system use only. Do not call directly. *) external unsafe_get : string -> int -> char = "%string_unsafe_get" -external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set" +external unsafe_set : Bytes.t -> int -> char -> unit = "%string_unsafe_set" external unsafe_blit : - string -> int -> string -> int -> int -> unit = "caml_blit_string" "noalloc" -external unsafe_fill : string -> int -> int -> char -> unit = "caml_fill_string" "noalloc" + string -> int -> Bytes.t -> int -> int -> unit = "caml_blit_string" "noalloc" +external unsafe_fill : + Bytes.t -> int -> int -> char -> unit = "caml_fill_string" "noalloc" (**/**) diff --git a/src/batString.mlv b/src/batString.mlv index 8322996ce..d092b5753 100644 --- a/src/batString.mlv +++ b/src/batString.mlv @@ -36,7 +36,7 @@ let init len f = for i = 0 to len - 1 do Bytes.unsafe_set s i (f i) done; - s + Bytes.unsafe_to_string s (*$T init init 5 (fun i -> BatChar.chr (i + int_of_char '0')) = "01234"; @@ -376,7 +376,7 @@ let join = concat let unsafe_slice i j s = if i >= j || i = length s then - Bytes.create 0 + "" else sub s i (j-i) @@ -524,7 +524,7 @@ let of_enum e = let s = Bytes.create l in let i = ref 0 in BatEnum.iter (fun c -> Bytes.unsafe_set s (BatRef.post_incr i) c) e; - s + Bytes.unsafe_to_string s (*$T of_enum Enum.init 3 (fun i -> char_of_int (i + int_of_char '0')) |> of_enum = "012" Enum.init 0 (fun _i -> ' ') |> of_enum = "" @@ -536,7 +536,8 @@ let of_backwards e = let s = Bytes.create l in let i = ref (l - 1) in BatEnum.iter (fun c -> Bytes.unsafe_set s (BatRef.post_decr i) c) e; - s + Bytes.unsafe_to_string s + (*$T of_backwards "" |> enum |> of_backwards = "" "foo" |> enum |> of_backwards = "oof" @@ -549,7 +550,7 @@ let map f s = for i = 0 to len - 1 do Bytes.unsafe_set sc i (f (unsafe_get s i)) done; - sc + Bytes.unsafe_to_string sc (*$T map map Char.uppercase "Five" = "FIVE" map Char.uppercase "" = "" @@ -562,7 +563,7 @@ let mapi f s = for i = 0 to len - 1 do Bytes.unsafe_set sc i (f i (unsafe_get s i)) done; - sc + Bytes.unsafe_to_string sc (*$T mapi mapi (fun _ -> Char.uppercase) "Five" = "FIVE" mapi (fun _ -> Char.uppercase) "" = "" @@ -685,9 +686,10 @@ let to_list = explode let implode l = let res = Bytes.create (List.length l) in let rec imp i = function - | [] -> res + | [] -> () | c :: l -> Bytes.set res i c; imp (i + 1) l in - imp 0 l + imp 0 l; + Bytes.unsafe_to_string res (*$T implode implode ['b';'a';'r'] = "bar" implode [] = "" @@ -719,11 +721,11 @@ let replace_chars f s = | s :: acc -> let len = length s in pos := !pos - len; - blit s 0 sbuf !pos len; + Bytes.blit_string s 0 sbuf !pos len; loop2 acc in loop2 strs; - sbuf + Bytes.unsafe_to_string sbuf (*$T replace_chars replace_chars (function ' ' -> "(space)" | c -> of_char c) "foo bar" = "foo(space)bar" replace_chars (fun _ -> "") "foo" = "" @@ -740,7 +742,7 @@ let replace ~str ~sub ~by = blit str 0 newstr 0 subpos ; blit by 0 newstr subpos bylen ; blit str (subpos + sublen) newstr (subpos + bylen) (strlen - subpos - sublen) ; - (true, newstr) + (true, Bytes.unsafe_to_string newstr) with Not_found -> (* find failed *) (false, str) (*$T replace @@ -765,14 +767,14 @@ let nreplace ~str ~sub ~by = match idxes with | [] -> (* still need the last chunk *) - unsafe_blit str i newstr j (strlen-i) + Bytes.blit_string str i newstr j (strlen-i) | i'::rest -> let di = i' - i in - unsafe_blit str i newstr j di ; - unsafe_blit by 0 newstr (j + di) bylen ; + Bytes.blit_string str i newstr j di ; + Bytes.blit_string by 0 newstr (j + di) bylen ; loop_copy (i + di + sublen) (j + di + bylen) rest in loop_copy 0 0 idxes ; - newstr + Bytes.unsafe_to_string newstr (*$T nreplace nreplace ~str:"bar foo aaa bar" ~sub:"aa" ~by:"foo" = "bar foo afoo bar" nreplace ~str:"bar foo bar" ~sub:"bar" ~by:"foo" = "foo foo foo" @@ -780,21 +782,21 @@ let nreplace ~str ~sub ~by = nreplace ~str:"" ~sub:"aa" ~by:"bb" = "" nreplace ~str:"foo bar baz" ~sub:"foo bar baz" ~by:"" = "" nreplace ~str:"abc" ~sub:"abc" ~by:"def" = "def" - let s1 = "foo" in let s2 = nreplace ~str:s1 ~sub:"X" ~by:"X" in set s2 0 'F' ; s1.[0] = 'f' *) let rev_in_place s = - let len = String.length s in + let len = Bytes.length s in if len > 0 then for k = 0 to (len - 1)/2 do - let old = s.[k] and mirror = len - 1 - k in - Bytes.set s k s.[mirror]; Bytes.set s mirror old; + let old = Bytes.get s k and mirror = len - 1 - k in + Bytes.set s k (Bytes.get s mirror); + Bytes.set s mirror old; done (*$= rev_in_place as f & ~printer:identity - (let s="" in f s; s) "" - (let s="1" in f s; s) "1" - (let s="12" in f s; s) "21" - (let s="Example!" in f s; s) "!elpmaxE" + (let s=Bytes.of_string "" in f s; Bytes.to_string s) "" + (let s=Bytes.of_string "1" in f s; Bytes.to_string s) "1" + (let s=Bytes.of_string "12" in f s; Bytes.to_string s) "21" + (let s=Bytes.of_string "Example!" in f s; Bytes.to_string s) "!elpmaxE" *) let in_place_mirror = rev_in_place @@ -815,7 +817,7 @@ let rev s = for i = 0 to len - 1 do Bytes.unsafe_set reversed (len - i - 1) (String.unsafe_get s i) done; - reversed + Bytes.unsafe_to_string reversed (*$T rev rev "" = "" @@ -852,10 +854,11 @@ let splice s1 off len s2 = let len = clip ~lo:0 ~hi:(len1 - off) len in let out_len = len1 - len + len2 in let s = Bytes.create out_len in - blit s1 0 s 0 off; (* s1 before splice point *) - blit s2 0 s off len2; (* s2 at splice point *) - blit s1 (off+len) s (off+len2) (len1 - (off+len)); (* s1 after off+len *) - s + Bytes.blit_string s1 0 s 0 off; (* s1 before splice point *) + Bytes.blit_string s2 0 s off len2; (* s2 at splice point *) + Bytes.blit_string (* s1 after off+len *) + s1 (off+len) s (off+len2) (len1 - (off+len)); + Bytes.unsafe_to_string s (*$T splice splice "foo bar baz" 3 5 "XXX" = "fooXXXbaz" splice "foo bar baz" 5 0 "XXX" = "foo bXXXar baz" @@ -1133,7 +1136,7 @@ struct let uncapitalize = uncapitalize let copy = copy let sub = sub - let fill = Bytes.fill + let fill = fill let blit = blit let concat = concat let escaped = escaped From dd4dbb42cdbaebcd829d242b1cf5a15ba0105d95 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 20:19:30 -0400 Subject: [PATCH 13/21] safe-string: adapt String.Cap --- src/batString.mliv | 81 +++++++++++-- src/batString.mlv | 208 ++++++++++++++++++---------------- src/batteriesExceptionless.ml | 10 +- src/batteriesPrint.ml | 2 + 4 files changed, 191 insertions(+), 110 deletions(-) diff --git a/src/batString.mliv b/src/batString.mliv index 393a7d3b4..87898bd7f 100644 --- a/src/batString.mliv +++ b/src/batString.mliv @@ -955,7 +955,15 @@ end (* String.Exceptionless *) with the added twist that strings can be made read-only or write-only. Read-only strings may then be safely shared and distributed. - There is no loss of performance involved. *) + @since NEXT_RELEASE the interface and implementation of the Cap + module changed to accomodate the -safe-string transition. OCaml + now uses two distinct types for mutable and immutable string, + which is a good design but is not as expressive as the present Cap + interface, and actually makes implementing Cap harder than it + previously was. We are aware that current state is not optimal for + heavy Cap users; if you are one of them, please get in touch (on + the Batteries issue tracker for example) so that we can discuss + code refactoring and improvements for this sub-module. *) module Cap: sig @@ -986,11 +994,66 @@ sig (** {6 Constructors}*) - external of_string : string -> _ t = "%identity" - (**Adopt a regular string.*) + external of_string : Bytes.t -> _ t = "%identity" + [@@ocaml.deprecated "Use Cap.of_bytes instead"] + (**Adopt a regular byte sequence. + + One could give a perfectly safe semantics to + an [of_string : string -> _ t] function, but this + requires making a copy of the string. Previous + versions of this interface advertised the absence + of performance overhead, so it's better to warn + the user and let them decide (through the use of + either Bytes.of_string or Bytes.unsafe_of_string) + whether they can safely avoid a copy or need to + insert one. + *) + + val of_bytes : Bytes.t -> _ t + (** Adopt a regular byte sequence. + + Note that adopting a byte sequence, even at the restrictive + [`Read] type, does not make a copy. Having a [`Read] string + prevents you (and anyone you pass it to) from writing it, but + your parent may have knowledge of the string at a more permissive + type and perform writes on it. + + If you want to use a [`Read] string and assume it will not get + written to, you should either properly "adopt" it by ensuring + unique ownership (this cannot be guaranteed by the type system), + or make a copy of it at adoption time: [Cap.of_bytes + (Bytes.copy buf)]. + + @since NEXT_RELEASE + *) - external to_string : [`Read | `Write] t -> string = "%identity" - (** Return a capability string as a regular string.*) + external to_string : [`Read | `Write] t -> Bytes.t = "%identity" + [@@ocaml.deprecated "Use Cap.to_bytes instead"] + (** Return a capability string as a regular byte sequence. + + We cannot return a [string] here, and it would be incorrect to + do so even if we required [[< `Read] t] as input. Indeed, one + can start from a writeable byte sequence, and then use the + [read_only] function below to cast it into a [[`Read] + t]. Capabilities are used to enforce local protocol (only reads, + only writes, both reads and writes...), they don't guarantee + that other users of the same (shared) value all follow the same + protocol. To safely reason about mutability one needs stronger + ownership guarantees. + + If you want to obtain an immutable [string] out of a capability + string, you should first convert it to a mutable byte sequence + and then copy it into an immutable string. If you have extra + knowledge about the ownership of the value, you may use unsafe + conversion functions to avoid the copy, see the documentation of + unsafe conversion functions. + *) + + external to_bytes : [`Read | `Write] t -> Bytes.t = "%identity" + (** Return a capability string as a regular byte sequence. + + @since NEXT_RELEASE + *) external read_only : [> `Read] t -> [`Read] t = "%identity" (** Drop capabilities to read only.*) @@ -1080,7 +1143,7 @@ sig val rchop : ?n:int -> [> `Read] t -> _ t - val chop : ?l:int -> ?r:int -> [> `Read] t -> string + val chop : ?l:int -> ?r:int -> [> `Read] t -> _ t val trim : [> `Read] t -> _ t @@ -1127,11 +1190,11 @@ sig (** {6 Splitting around}*) val split : [> `Read] t -> by:[> `Read] t -> _ t * _ t - val rsplit : [> `Read] t -> by:string -> string * string + val rsplit : [> `Read] t -> by:[> `Read] t -> _ t * _ t val nsplit : [> `Read] t -> by:[> `Read] t -> _ t list - val splice: [ `Read | `Write] t -> int -> int -> [> `Read] t -> string + val splice: [ `Read | `Write] t -> int -> int -> [> `Read] t -> _ t val join : [> `Read] t -> [> `Read] t list -> _ t @@ -1190,10 +1253,8 @@ sig val rfind_from: [> `Read] t -> int -> [> `Read] t -> int option - (* val split : string -> string -> (string * string) option TODO *) val split : [> `Read] t -> by:[> `Read] t -> (_ t * _ t) option - (* val rsplit : string -> string -> (string * string) option TODO *) val rsplit : [> `Read] t -> by:[> `Read] t -> (_ t * _ t) option end (* String.Cap.Exceptionless *) diff --git a/src/batString.mlv b/src/batString.mlv index d092b5753..9babb4ba4 100644 --- a/src/batString.mlv +++ b/src/batString.mlv @@ -227,7 +227,8 @@ let exists str sub = not (exists "ab" "c") *) -let strip ?(chars = " \t\r\n") s = +let strip_default = " \t\r\n" +let strip ?(chars = strip_default) s = let p = ref 0 in let l = length s in while !p < l && contains chars (unsafe_get s !p) do @@ -1090,88 +1091,99 @@ end (* String.Exceptionless *) module Cap = struct - type 'a t = string - - let make = make - let is_empty = is_empty - let init = init - let enum = enum - let of_enum = of_enum - let backwards = backwards - let of_backwards = of_backwards - - let of_int = of_int - let of_float = of_float - let of_char = of_char - let to_int = to_int - let to_float = to_float - let map = map - let mapi = mapi - let fold_left = fold_left - let fold_right = fold_right - let fold_lefti = fold_lefti - let fold_righti = fold_righti - let iter = iter - let index = index - let rindex = rindex - let index_from = index_from - let rindex_from = rindex_from - let contains = contains - let contains_from = contains_from - let rcontains_from= rcontains_from - let find = find - let find_from = find_from - let rfind = rfind - let rfind_from = rfind_from - let ends_with = ends_with - let starts_with = starts_with - let exists = exists - let lchop = lchop - let rchop = rchop - let chop = chop - let strip = strip - let uppercase = uppercase - let lowercase = lowercase - let capitalize = capitalize - let uncapitalize = uncapitalize - let copy = copy - let sub = sub - let fill = fill - let blit = blit - let concat = concat - let escaped = escaped - let replace_chars = replace_chars - let replace = replace - let nreplace = nreplace - let split = split - let repeat = repeat - let rsplit = rsplit - let nsplit = nsplit - let join = join - let slice = slice - let explode = explode - let implode = implode - let compare = compare - let icompare = icompare - let splice = splice - let trim = trim - let quote = quote - let left = left - let right = right - let head = head - let tail = tail - let filter_map = filter_map - let filter = filter - let of_list = of_list - let to_list = to_list - - let quote = quote - let print = print - let println = println - let print_quoted = print_quoted - - external of_string : string -> _ t = "%identity" - external to_string : [`Read | `Write] t -> string = "%identity" + type 'a t = Bytes.t + let ubos = Bytes.unsafe_of_string + let usob = Bytes.unsafe_to_string + + let make = Bytes.make + let is_empty b = is_empty (usob b) + let init n f = ubos (init n f) + let enum b = enum (usob b) + let of_enum e = ubos (of_enum e) + let backwards b = backwards (usob b) + let of_backwards e = ubos (of_backwards e) + + let of_int n = ubos (of_int n) + let of_float x = ubos (of_float x) + let of_char c = ubos (of_char c) + let to_int b = to_int (usob b) + let to_float b = to_float (usob b) + let map f b = ubos (map f (usob b)) + let mapi f b = ubos (mapi f (usob b)) + let fold_left f v b = fold_left f v (usob b) + let fold_right f b v = fold_right f (usob b) v + let fold_lefti f v b = fold_lefti f v (usob b) + let fold_righti f b v = fold_righti f (usob b) v + let iter f b = iter f (usob b) + let index b c = index (usob b) c + let rindex b c = rindex (usob b) c + let index_from b i c = index_from (usob b) i c + let rindex_from b i c = rindex_from (usob b) i c + let contains b c = contains (usob b) c + let contains_from b i c = contains_from (usob b) i c + let rcontains_from b i c = rcontains_from (usob b) i c + let find b1 b2 = find (usob b1) (usob b2) + let find_from b1 i b2 = find_from (usob b1) i (usob b2) + let rfind b1 b2 = rfind (usob b1) (usob b2) + let rfind_from b1 i b2 = rfind_from (usob b1) i (usob b2) + let ends_with b1 b2 = ends_with (usob b1) (usob b2) + let starts_with b1 b2 = starts_with (usob b1) (usob b2) + let exists b1 b2 = exists (usob b1) (usob b2) + let lchop ?n b = ubos (lchop ?n (usob b)) + let rchop ?n b = ubos (rchop ?n (usob b)) + let chop ?l ?r b = ubos (chop ?l ?r (usob b)) + let strip ?(chars = ubos strip_default) b = + ubos (strip ~chars:(usob chars) (usob b)) + let uppercase b = ubos (uppercase (usob b)) + let lowercase b = ubos (lowercase (usob b)) + let capitalize b = ubos (capitalize (usob b)) + let uncapitalize b = ubos (uncapitalize (usob b)) + let copy = Bytes.copy + let sub = Bytes.sub + let fill = Bytes.fill + let blit = Bytes.blit + let concat = Bytes.concat + let escaped = Bytes.escaped + let replace_chars f b = ubos (replace_chars (fun c -> usob (f c)) (usob b)) + let replace ~str ~sub ~by = + let (b, s) = replace ~str:(usob str) ~sub:(usob sub) ~by:(usob by) in + (b, ubos s) + let nreplace ~str ~sub ~by = + ubos (nreplace ~str:(usob str) ~sub:(usob sub) ~by:(usob by)) + let split b ~by = + let (a, b) = split (usob b) ~by:(usob by) in + (ubos a, ubos b) + let repeat b i = ubos (repeat (usob b) i) + let rsplit b ~by = + let (a, b) = rsplit (usob b) ~by:(usob by) in + (ubos a, ubos b) + let nsplit b ~by = List.map ubos (nsplit (usob b) ~by:(usob by)) + let join = Bytes.concat + let slice ?first ?last b = ubos (slice ?first ?last (usob b)) + let explode b = explode (usob b) + let implode cs = ubos (implode cs) + let compare b1 b2 = compare (usob b1) (usob b2) + let icompare b1 b2 = icompare (usob b1) (usob b2) + let splice b1 i1 i2 b2 = ubos (splice (usob b1) i1 i2 (usob b2)) + let trim b = ubos (trim (usob b)) + let quote b = quote (usob b) + let left b i = ubos (left (usob b) i) + let right b i = ubos (right (usob b) i) + let head b i = ubos (head (usob b) i) + let tail b i = ubos (tail (usob b) i) + let filter_map f b = ubos (filter_map f (usob b)) + let filter f b = ubos (filter f (usob b)) + let of_list li = ubos (of_list li) + let to_list b = to_list (usob b) + + let print io b = print io (usob b) + let println io b = println io (usob b) + let print_quoted io b = print_quoted io (usob b) + + external of_string : Bytes.t -> _ t = "%identity" + external of_bytes : Bytes.t -> _ t = "%identity" + external to_string : [`Read | `Write] t -> Bytes.t = "%identity" + external to_bytes : [`Read | `Write] t -> Bytes.t = "%identity" external read_only : [> `Read] t -> [`Read] t = "%identity" external write_only: [> `Write] t -> [`Write] t = "%identity" @@ -1188,18 +1200,24 @@ struct module Exceptionless = struct - let find_from = Exceptionless.find_from - let find = Exceptionless.find - let rfind_from = Exceptionless.rfind_from - let rfind = Exceptionless.rfind - let to_int = Exceptionless.to_int - let to_float = Exceptionless.to_float - let index = Exceptionless.index - let index_from = Exceptionless.index_from - let rindex_from = Exceptionless.rindex_from - let rindex = Exceptionless.rindex - let split = Exceptionless.split - let rsplit = Exceptionless.rsplit + let find_from b1 i b2 = Exceptionless.find_from (usob b1) i (usob b2) + let find b1 b2 = Exceptionless.find (usob b1) (usob b2) + let rfind_from b1 i b2 = Exceptionless.rfind_from (usob b1) i (usob b2) + let rfind b1 b2 = Exceptionless.rfind (usob b1) (usob b2) + let to_int b = Exceptionless.to_int (usob b) + let to_float b = Exceptionless.to_float (usob b) + let index b c = Exceptionless.index (usob b) c + let index_from b i c = Exceptionless.index_from (usob b) i c + let rindex_from b i c = Exceptionless.rindex_from (usob b) i c + let rindex b c = Exceptionless.rindex (usob b) c + let split b ~by = + match Exceptionless.split (usob b) ~by:(usob by) with + | None -> None + | Some (a, b) -> Some (ubos a, ubos b) + let rsplit b ~by = + match Exceptionless.rsplit (usob b) ~by:(usob by) with + | None -> None + | Some (a, b) -> Some (ubos a, ubos b) end (* String.Cap.Exceptionless *) end (* String.Cap *) diff --git a/src/batteriesExceptionless.ml b/src/batteriesExceptionless.ml index 29c545c48..18765ea77 100644 --- a/src/batteriesExceptionless.ml +++ b/src/batteriesExceptionless.ml @@ -71,13 +71,13 @@ end module String = struct include (BatString : module type of BatString - with module Cap := BatString.Cap + (* with module Cap := BatString.Cap *) ) include BatString.Exceptionless - module Cap = struct - include BatString.Cap - include BatString.Cap.Exceptionless - end + (* module Cap = struct *) + (* include BatString.Cap *) + (* include BatString.Cap.Exceptionless *) + (* end *) end (* Extlib modules not replacing stdlib *) diff --git a/src/batteriesPrint.ml b/src/batteriesPrint.ml index 8cfd5306e..35e4f98d5 100644 --- a/src/batteriesPrint.ml +++ b/src/batteriesPrint.ml @@ -27,6 +27,7 @@ let print_rope fmt t = let print_ustring fmt t = Format.fprintf fmt "u%S" t +(* let string_of_cap t = BatString.Cap.to_string (BatString.Cap.copy t) let print_string_cap_rw fmt t = @@ -34,6 +35,7 @@ let print_string_cap_rw fmt t = let print_string_cap_ro fmt t = Format.fprintf fmt "ro%S" (string_of_cap t) + *) let string_dynarray = BatIO.to_f_printer (BatDynArray.print BatString.print) let int_dynarray = BatIO.to_f_printer (BatDynArray.print BatInt.print) From bcb538ca84209c2d3ea251a455096b30dfc35627 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 20:20:19 -0400 Subject: [PATCH 14/21] safe-string: make Unix safe --- src/batUnix.mlv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/batUnix.mlv b/src/batUnix.mlv index e122e16fc..9b09a2895 100644 --- a/src/batUnix.mlv +++ b/src/batUnix.mlv @@ -38,7 +38,7 @@ let run_and_read cmd = begin let was_read = ref (input ic line_buff 0 buff_size) in while !was_read <> 0 do - Buffer.add_substring buff line_buff 0 !was_read; + Buffer.add_subbytes buff line_buff 0 !was_read; was_read := input ic line_buff 0 buff_size; done; close_in ic; From 5d67375f4c81b06238309965764353cf03cdd099 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 20:21:07 -0400 Subject: [PATCH 15/21] safe-string: make Base64 safe --- src/batBase64.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/batBase64.ml b/src/batBase64.ml index 17e0e54bc..4660f9eb2 100644 --- a/src/batBase64.ml +++ b/src/batBase64.ml @@ -67,7 +67,7 @@ let encode ?(tbl=chars) ch = in let output s p l = for i = p to p + l - 1 do - write (String.unsafe_get s i) + write (Bytes.unsafe_get s i) done; l in From 4b50dcc5f1f2f68ff153cc541bec18311a267021 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 21:27:13 -0400 Subject: [PATCH 16/21] safe-string: make BitSet safe --- src/batBitSet.ml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/batBitSet.ml b/src/batBitSet.ml index 89fb773b6..d835aa003 100644 --- a/src/batBitSet.ml +++ b/src/batBitSet.ml @@ -37,14 +37,15 @@ let print_array = Array.init 256 print_bchar let print out t = - for i = 0 to (String.length !t) - 1 do + let buf = !t in + for i = 0 to (Bytes.length buf) - 1 do BatInnerIO.nwrite out - (Array.unsafe_get print_array (Char.code (Bytes.unsafe_get !t i))) + (Array.unsafe_get print_array (Char.code (Bytes.unsafe_get buf i))) done let capacity t = (Bytes.length !t) * 8 -let empty () = ref "" +let empty () = ref (Bytes.create 0) let create_ sfun c n = (* n is in bits *) if n < 0 then invalid_arg ("BitSet."^sfun^": negative size"); From b5cd62cdd79fcd8bec95cfa6863fc43b40cdc993 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 21:32:00 -0400 Subject: [PATCH 17/21] safe-string: make Substring safe --- src/batSubstring.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/batSubstring.ml b/src/batSubstring.ml index 97ed3c22b..0ec78d551 100644 --- a/src/batSubstring.ml +++ b/src/batSubstring.ml @@ -85,7 +85,7 @@ let of_input inp = and tmp = Bytes.create tempsize in let n = ref 0 in while n := BatIO.input inp tmp 0 tempsize; !n > 0 do - Buffer.add_substring buf tmp 0 !n; + Buffer.add_subbytes buf tmp 0 !n; done; Buffer.contents buf, 0, Buffer.length buf @@ -197,10 +197,10 @@ let concat ssl = let item = Bytes.create len in let write = let pos = ref 0 in - fun (s,o,len) -> String.unsafe_blit s o item !pos len; pos := !pos + len + fun (s,o,len) -> Bytes.blit_string s o item !pos len; pos := !pos + len in List.iter write ssl; - item + Bytes.unsafe_to_string item (*$T concat concat [empty ()] = "" concat [substring "foobar" 1 3; empty ()] = "oob" From 7bc055015d53d479ec21952e61a86f279535d0c1 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 19 Apr 2016 21:32:07 -0400 Subject: [PATCH 18/21] safe-string: make Text safe --- src/batText.ml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/batText.ml b/src/batText.ml index 27166077d..2ff557f66 100644 --- a/src/batText.ml +++ b/src/batText.ml @@ -44,10 +44,11 @@ let splice s1 off len s2 = let len = int_min (len1 - off) len in let out_len = len1 - len + len2 in let s = Bytes.create out_len in - String.blit s1 0 s 0 off; (* s1 before splice point *) - String.blit s2 0 s off len2; (* s2 at splice point *) - String.blit s1 (off+len) s (off+len2) (len1 - (off+len)); (* s1 after off+len *) - s + Bytes.blit_string s1 0 s 0 off; (* s1 before splice point *) + Bytes.blit_string s2 0 s off len2; (* s2 at splice point *) + Bytes.blit_string (* s1 after off+len *) + s1 (off+len) s (off+len2) (len1 - (off+len)); + Bytes.unsafe_to_string s type t = Empty (**An empty rope*) @@ -1022,7 +1023,9 @@ let read_char i = else let s = Bytes.create len in Bytes.set s 0 n0; - ignore(really_input i s 1 ( len - 1)); + let n = really_input i s 1 (len - 1) in + assert (n = len - 1); + let s = Bytes.unsafe_to_string s in UTF8.get s 0 From ea80aa778acca9c6be287dee44e5c861e7ede0d8 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Wed, 20 Apr 2016 09:45:35 -0400 Subject: [PATCH 19/21] _tags: actually enforce safe_string --- _tags | 2 ++ 1 file changed, 2 insertions(+) diff --git a/_tags b/_tags index 02dedf6df..b50a32e98 100644 --- a/_tags +++ b/_tags @@ -10,3 +10,5 @@ true: package(bytes), warn_-3, bin_annot ".git": -traverse "examples": -traverse : opaque +true: safe_string + From f788c7512255a10c805d7c70bc1d69d1a60d3855 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sat, 23 Sep 2017 13:27:55 +0200 Subject: [PATCH 20/21] safe-string: make BatBytes safe --- src/batBytes.mlv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/batBytes.mlv b/src/batBytes.mlv index 23aaf993d..6a00de363 100644 --- a/src/batBytes.mlv +++ b/src/batBytes.mlv @@ -47,13 +47,13 @@ include Bytes ##V<4.3##let lowercase_ascii s = map BatChar.lowercase_ascii s (*$T uppercase_ascii - equal ("five" |> of_string |> uppercase_ascii |> to_string) "FIVE" - equal ("école" |> of_string |> uppercase_ascii |> to_string) "éCOLE" + String.equal ("five" |> of_string |> uppercase_ascii |> to_string) "FIVE" + String.equal ("école" |> of_string |> uppercase_ascii |> to_string) "éCOLE" *) (*$T lowercase_ascii - equal ("FIVE" |> of_string |> lowercase_ascii |> to_string) "five" - equal ("ÉCOLE" |> of_string |> lowercase_ascii |> to_string) "École" + String.equal ("FIVE" |> of_string |> lowercase_ascii |> to_string) "five" + String.equal ("ÉCOLE" |> of_string |> lowercase_ascii |> to_string) "École" *) ##V<4.3##let map_first_char f s = @@ -66,13 +66,13 @@ include Bytes ##V<4.3##let uncapitalize_ascii s = map_first_char BatChar.lowercase_ascii s (*$T capitalize_ascii - equal ("five" |> of_string |> capitalize_ascii |> to_string) "Five" - equal ("école" |> of_string |> capitalize_ascii |> to_string) "école" + String.equal ("five" |> of_string |> capitalize_ascii |> to_string) "Five" + String.equal ("école" |> of_string |> capitalize_ascii |> to_string) "école" *) (*$T uncapitalize_ascii - equal ("Five" |> of_string |> uncapitalize_ascii |> to_string) "five" - equal ("École" |> of_string |> uncapitalize_ascii |> to_string) "École" + String.equal ("Five" |> of_string |> uncapitalize_ascii |> to_string) "five" + String.equal ("École" |> of_string |> uncapitalize_ascii |> to_string) "École" *) From 2b67824504786febd34835d4610f007faba3923e Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Sat, 23 Sep 2017 14:14:02 +0200 Subject: [PATCH 21/21] -safe-string: Changelog --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4530affe8..1f33cf15f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,16 @@ Changelog --------- +## v2.8.0 (minor release) + +This minor release supports the -safe-string mode for OCaml +compilation, enforcing a type-level separation between (immutable) +strings and mutable byte sequences. + +- support -safe-string compilation + #673 + (Gabriel Scherer) + ## v2.7.0 (minor release) This minor release is the first to support OCaml 4.05.0. As with