(**************************************************************************) (* *) (* OCaml *) (* *) (* Pierre Weis and Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1999 Institut National de Recherche en Informatique et *) (* en Automatique. *) (* *) (* All rights reserved. This file is distributed under the terms of *) (* the GNU Lesser General Public License version 2.1, with the *) (* special exception on linking described in the file LICENSE. *) (* *) (**************************************************************************) (* Extensible buffers *) type t = {mutable buffer : bytes; mutable position : int; mutable length : int; initial_buffer : bytes} (* Invariants: all parts of the code preserve the invariants that: - [0 <= b.position <= b.length] - [b.length = Bytes.length b.buffer] Note in particular that [b.position = b.length] is legal, it means that the buffer is full and will have to be extended before any further addition. *) let create n = let n = if n < 1 then 1 else n in let n = if n > Sys.max_string_length then Sys.max_string_length else n in let s = Bytes.create n in {buffer = s; position = 0; length = n; initial_buffer = s} let contents b = Bytes.sub_string b.buffer 0 b.position let to_bytes b = Bytes.sub b.buffer 0 b.position let sub b ofs len = if ofs < 0 || len < 0 || ofs > b.position - len then invalid_arg "Buffer.sub" else Bytes.sub_string b.buffer ofs len let blit src srcoff dst dstoff len = if len < 0 || srcoff < 0 || srcoff > src.position - len || dstoff < 0 || dstoff > (Bytes.length dst) - len then invalid_arg "Buffer.blit" else Bytes.unsafe_blit src.buffer srcoff dst dstoff len let nth b ofs = if ofs < 0 || ofs >= b.position then invalid_arg "Buffer.nth" else Bytes.unsafe_get b.buffer ofs let length b = b.position let clear b = b.position <- 0 let reset b = b.position <- 0; b.buffer <- b.initial_buffer; b.length <- Bytes.length b.buffer (* [resize b more] ensures that [b.position + more <= b.length] holds by dynamically extending [b.buffer] if necessary -- and thus increasing [b.length]. In particular, after [resize b more] is called, a direct access of size [more] at [b.position] will always be in-bounds, so that (unsafe_{get,set}) may be used for performance. *) let resize b more = let old_pos = b.position in let old_len = b.length in let new_len = ref old_len in while old_pos + more > !new_len do new_len := 2 * !new_len done; if !new_len > Sys.max_string_length then begin if old_pos + more <= Sys.max_string_length then new_len := Sys.max_string_length else failwith "Buffer.add: cannot grow buffer" end; let new_buffer = Bytes.create !new_len in (* PR#6148: let's keep using [blit] rather than [unsafe_blit] in this tricky function that is slow anyway. *) Bytes.blit b.buffer 0 new_buffer 0 b.position; b.buffer <- new_buffer; b.length <- !new_len; assert (b.position + more <= b.length); assert (old_pos + more <= b.length); () (* Note: there are various situations (preemptive threads, signals and gc finalizers) where OCaml code may be run asynchronously; in particular, there may be a race with another user of [b], changing its mutable fields in the middle of the [resize] call. The Buffer module does not provide any correctness guarantee if that happens, but we must still ensure that the datastructure invariants hold for memory-safety -- as we plan to use [unsafe_{get,set}]. There are two potential allocation points in this function, [ref] and [Bytes.create], but all reads and writes to the fields of [b] happen before both of them or after both of them. We therefore assume that [b.position] may change at these allocations, and check that the [b.position + more <= b.length] postcondition holds for both values of [b.position], before or after the function is called. More precisely, the following invariants must hold if the function returns correctly, in addition to the usual buffer invariants: - [old(b.position) + more <= new(b.length)] - [new(b.position) + more <= new(b.length)] - [old(b.length) <= new(b.length)] Note: [b.position + more <= old(b.length)] does *not* hold in general, as it is precisely the case where you need to call [resize] to increase [b.length]. Note: [assert] above does not mean that we know the conditions always hold, but that the function may return correctly only if they hold. Note: the other functions in this module does not need to be checked with this level of scrutiny, given that they read/write the buffer immediately after checking that [b.position + more <= b.length] hold or calling [resize]. *) let add_char b c = let pos = b.position in if pos >= b.length then resize b 1; Bytes.unsafe_set b.buffer pos c; b.position <- pos + 1 let add_utf_8_uchar b u = match Uchar.to_int u with | u when u < 0 -> assert false | u when u <= 0x007F -> add_char b (Char.unsafe_chr u) | u when u <= 0x07FF -> let pos = b.position in if pos + 2 > b.length then resize b 2; Bytes.unsafe_set b.buffer (pos ) (Char.unsafe_chr (0xC0 lor (u lsr 6))); Bytes.unsafe_set b.buffer (pos + 1) (Char.unsafe_chr (0x80 lor (u land 0x3F))); b.position <- pos + 2 | u when u <= 0xFFFF -> let pos = b.position in if pos + 3 > b.length then resize b 3; Bytes.unsafe_set b.buffer (pos ) (Char.unsafe_chr (0xE0 lor (u lsr 12))); Bytes.unsafe_set b.buffer (pos + 1) (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F))); Bytes.unsafe_set b.buffer (pos + 2) (Char.unsafe_chr (0x80 lor (u land 0x3F))); b.position <- pos + 3 | u when u <= 0x10FFFF -> let pos = b.position in if pos + 4 > b.length then resize b 4; Bytes.unsafe_set b.buffer (pos ) (Char.unsafe_chr (0xF0 lor (u lsr 18))); Bytes.unsafe_set b.buffer (pos + 1) (Char.unsafe_chr (0x80 lor ((u lsr 12) land 0x3F))); Bytes.unsafe_set b.buffer (pos + 2) (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F))); Bytes.unsafe_set b.buffer (pos + 3) (Char.unsafe_chr (0x80 lor (u land 0x3F))); b.position <- pos + 4 | _ -> assert false let add_utf_16be_uchar b u = match Uchar.to_int u with | u when u < 0 -> assert false | u when u <= 0xFFFF -> let pos = b.position in if pos + 2 > b.length then resize b 2; Bytes.unsafe_set b.buffer (pos ) (Char.unsafe_chr (u lsr 8)); Bytes.unsafe_set b.buffer (pos + 1) (Char.unsafe_chr (u land 0xFF)); b.position <- pos + 2 | u when u <= 0x10FFFF -> let u' = u - 0x10000 in let hi = 0xD800 lor (u' lsr 10) in let lo = 0xDC00 lor (u' land 0x3FF) in let pos = b.position in if pos + 4 > b.length then resize b 4; Bytes.unsafe_set b.buffer (pos ) (Char.unsafe_chr (hi lsr 8)); Bytes.unsafe_set b.buffer (pos + 1) (Char.unsafe_chr (hi land 0xFF)); Bytes.unsafe_set b.buffer (pos + 2) (Char.unsafe_chr (lo lsr 8)); Bytes.unsafe_set b.buffer (pos + 3) (Char.unsafe_chr (lo land 0xFF)); b.position <- pos + 4 | _ -> assert false let add_utf_16le_uchar b u = match Uchar.to_int u with | u when u < 0 -> assert false | u when u <= 0xFFFF -> let pos = b.position in if pos + 2 > b.length then resize b 2; Bytes.unsafe_set b.buffer (pos ) (Char.unsafe_chr (u land 0xFF)); Bytes.unsafe_set b.buffer (pos + 1) (Char.unsafe_chr (u lsr 8)); b.position <- pos + 2 | u when u <= 0x10FFFF -> let u' = u - 0x10000 in let hi = 0xD800 lor (u' lsr 10) in let lo = 0xDC00 lor (u' land 0x3FF) in let pos = b.position in if pos + 4 > b.length then resize b 4; Bytes.unsafe_set b.buffer (pos ) (Char.unsafe_chr (hi land 0xFF)); Bytes.unsafe_set b.buffer (pos + 1) (Char.unsafe_chr (hi lsr 8)); Bytes.unsafe_set b.buffer (pos + 2) (Char.unsafe_chr (lo land 0xFF)); Bytes.unsafe_set b.buffer (pos + 3) (Char.unsafe_chr (lo lsr 8)); b.position <- pos + 4 | _ -> assert false let add_substring b s offset len = if offset < 0 || len < 0 || offset > String.length s - len then invalid_arg "Buffer.add_substring/add_subbytes"; let new_position = b.position + len in if new_position > b.length then resize b len; Bytes.unsafe_blit_string s offset b.buffer b.position len; b.position <- new_position let add_subbytes b s offset len = add_substring b (Bytes.unsafe_to_string s) offset len let add_string b s = let len = String.length s in let new_position = b.position + len in if new_position > b.length then resize b len; Bytes.unsafe_blit_string s 0 b.buffer b.position len; b.position <- new_position let add_bytes b s = add_string b (Bytes.unsafe_to_string s) let add_buffer b bs = add_subbytes b bs.buffer 0 bs.position (* this (private) function could move into the standard library *) let really_input_up_to ic buf ofs len = let rec loop ic buf ~already_read ~ofs ~to_read = if to_read = 0 then already_read else begin let r = input ic buf ofs to_read in if r = 0 then already_read else begin let already_read = already_read + r in let ofs = ofs + r in let to_read = to_read - r in loop ic buf ~already_read ~ofs ~to_read end end in loop ic buf ~already_read:0 ~ofs ~to_read:len let unsafe_add_channel_up_to b ic len = if b.position + len > b.length then resize b len; let n = really_input_up_to ic b.buffer b.position len in (* The assertion below may fail in weird scenario where threaded/finalizer code, run asynchronously during the [really_input_up_to] call, races on the buffer; we don't ensure correctness in this case, but need to preserve the invariants for memory-safety (see discussion of [resize]). *) assert (b.position + n <= b.length); b.position <- b.position + n; n let add_channel b ic len = if len < 0 || len > Sys.max_string_length then (* PR#5004 *) invalid_arg "Buffer.add_channel"; let n = unsafe_add_channel_up_to b ic len in (* It is intentional that a consumer catching End_of_file will see the data written (see #6719, #7136). *) if n < len then raise End_of_file; () let output_buffer oc b = output oc b.buffer 0 b.position let closing = function | '(' -> ')' | '{' -> '}' | _ -> assert false (* opening and closing: open and close characters, typically ( and ) k: balance of opening and closing chars s: the string where we are searching start: the index where we start the search. *) let advance_to_closing opening closing k s start = let rec advance k i lim = if i >= lim then raise Not_found else if s.[i] = opening then advance (k + 1) (i + 1) lim else if s.[i] = closing then if k = 0 then i else advance (k - 1) (i + 1) lim else advance k (i + 1) lim in advance k start (String.length s) let advance_to_non_alpha s start = let rec advance i lim = if i >= lim then lim else match s.[i] with | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' -> advance (i + 1) lim | _ -> i in advance start (String.length s) (* We are just at the beginning of an ident in s, starting at start. *) let find_ident s start lim = if start >= lim then raise Not_found else match s.[start] with (* Parenthesized ident ? *) | '(' | '{' as c -> let new_start = start + 1 in let stop = advance_to_closing c (closing c) 0 s new_start in String.sub s new_start (stop - start - 1), stop + 1 (* Regular ident *) | _ -> let stop = advance_to_non_alpha s (start + 1) in String.sub s start (stop - start), stop (* Substitute $ident, $(ident), or ${ident} in s, according to the function mapping f. *) let add_substitute b f s = let lim = String.length s in let rec subst previous i = if i < lim then begin match s.[i] with | '$' as current when previous = '\\' -> add_char b current; subst ' ' (i + 1) | '$' -> let j = i + 1 in let ident, next_i = find_ident s j lim in add_string b (f ident); subst ' ' next_i | current when previous == '\\' -> add_char b '\\'; add_char b current; subst ' ' (i + 1) | '\\' as current -> subst current (i + 1) | current -> add_char b current; subst current (i + 1) end else if previous = '\\' then add_char b previous in subst ' ' 0 let truncate b len = if len < 0 || len > length b then invalid_arg "Buffer.truncate" else b.position <- len (** {1 Iterators} *) let to_seq b = let rec aux i () = (* Note that b.position is not a constant and cannot be lifted out of aux *) if i >= b.position then Seq.Nil else let x = Bytes.unsafe_get b.buffer i in Seq.Cons (x, aux (i+1)) in aux 0 let to_seqi b = let rec aux i () = (* Note that b.position is not a constant and cannot be lifted out of aux *) if i >= b.position then Seq.Nil else let x = Bytes.unsafe_get b.buffer i in Seq.Cons ((i,x), aux (i+1)) in aux 0 let add_seq b seq = Seq.iter (add_char b) seq let of_seq i = let b = create 32 in add_seq b i; b (** {6 Binary encoding of integers} *) external unsafe_set_int8 : bytes -> int -> int -> unit = "%bytes_unsafe_set" external unsafe_set_int16 : bytes -> int -> int -> unit = "%caml_bytes_set16u" external unsafe_set_int32 : bytes -> int -> int32 -> unit = "%caml_bytes_set32u" external unsafe_set_int64 : bytes -> int -> int64 -> unit = "%caml_bytes_set64u" external swap16 : int -> int = "%bswap16" external swap32 : int32 -> int32 = "%bswap_int32" external swap64 : int64 -> int64 = "%bswap_int64" let add_int8 b x = let new_position = b.position + 1 in if new_position > b.length then resize b 1; unsafe_set_int8 b.buffer b.position x; b.position <- new_position let add_int16_ne b x = let new_position = b.position + 2 in if new_position > b.length then resize b 2; unsafe_set_int16 b.buffer b.position x; b.position <- new_position let add_int32_ne b x = let new_position = b.position + 4 in if new_position > b.length then resize b 4; unsafe_set_int32 b.buffer b.position x; b.position <- new_position let add_int64_ne b x = let new_position = b.position + 8 in if new_position > b.length then resize b 8; unsafe_set_int64 b.buffer b.position x; b.position <- new_position let add_int16_le b x = add_int16_ne b (if Sys.big_endian then swap16 x else x) let add_int16_be b x = add_int16_ne b (if Sys.big_endian then x else swap16 x) let add_int32_le b x = add_int32_ne b (if Sys.big_endian then swap32 x else x) let add_int32_be b x = add_int32_ne b (if Sys.big_endian then x else swap32 x) let add_int64_le b x = add_int64_ne b (if Sys.big_endian then swap64 x else x) let add_int64_be b x = add_int64_ne b (if Sys.big_endian then x else swap64 x) let add_uint8 = add_int8 let add_uint16_ne = add_int16_ne let add_uint16_le = add_int16_le let add_uint16_be = add_int16_be