ocaml/stdlib/stringLabels.mli

412 lines
15 KiB
OCaml

(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* NOTE:
If this file is stringLabels.mli, run tools/sync_stdlib_docs after editing
it to generate string.mli.
If this file is string.mli, do not edit it directly -- edit
stringLabels.mli instead.
*)
(** Strings.
A string [s] of length [n] is an indexable and immutable sequence
of [n] bytes. For historical reasons these bytes are referred to
as characters.
The semantics of string functions is defined in terms of
indices and positions. These are depicted and described
as follows.
{v
positions 0 1 2 3 4 n-1 n
+---+---+---+---+ +-----+
indices | 0 | 1 | 2 | 3 | ... | n-1 |
+---+---+---+---+ +-----+
v}
{ul
{- An {e index} [i] of [s] is an integer in the range \[[0];[n-1]\].
It represents the [i]th byte (character) of [s] which can be
acccessed using the constant time string indexing operator
[s.[i]].}
{- A {e position} [i] of [s] is an integer in the range
\[[0];[n]\]. It represents either the point at the beginning of
the string, or the point between two indices, or the point at
the end of the string. The [i]th byte index is between position
[i] and [i+1].}}
Two integers [start] and [len] are said to define a {e valid
substring} of [s] if [len >= 0] and [start], [start+len] are
positions of [s].
{b Unicode text.} Strings being arbitrary sequences of bytes, they
can hold any kind of textual encoding. However the recommended
encoding for storing Unicode text in OCaml strings is UTF-8. This
is the encoding used by Unicode escapes in string literals. For
example the string ["\u{1F42B}"] is the UTF-8 encoding of the
Unicode character U+1F42B.
{b Past mutability.} OCaml strings used to be modifiable in place,
for instance via the {!String.set} and {!String.blit}
functions. This use is nowadays only possible when the compiler is
put in "unsafe-string" mode by giving the [-unsafe-string]
command-line option. This compatibility mode makes the types
[string] and [bytes] (see {!Bytes.t}) interchangeable so that
functions expecting byte sequences can also accept strings as
arguments and modify them.
The distinction between [bytes] and [string] was introduced in
OCaml 4.02, and the "unsafe-string" compatibility mode was the
default until OCaml 4.05. Starting with 4.06, the compatibility
mode is opt-in; we intend to remove the option in the future.
The labeled version of this module can be used as described in the
{!StdLabels} module.
*)
(** {1:strings Strings} *)
type t = string
(** The type for strings. *)
val make : int -> char -> string
(** [make n c] is a string of length [n] with each index holding the
character [c].
@raise Invalid_argument if [n < 0] or [n > ]{!Sys.max_string_length}. *)
val init : int -> f:(int -> char) -> string
(** [init n ~f] is a string of length [n] with index
[i] holding the character [f i] (called in increasing index order).
@raise Invalid_argument if [n < 0] or [n > ]{!Sys.max_string_length}.
@since 4.02.0 *)
external length : string -> int = "%string_length"
(** [length s] is the length (number of bytes/characters) of [s]. *)
external get : string -> int -> char = "%string_safe_get"
(** [get s i] is the character at index [i] in [s]. This is the same
as writing [s.[i]].
@raise Invalid_argument if [i] not an index of [s]. *)
(** {1:concat Concatenating}
{b Note.} The {!Stdlib.( ^ )} binary operator concatenates two
strings. *)
val concat : sep:string -> string list -> string
(** [concat ~sep ss] concatenates the list of strings [ss], inserting
the separator string [sep] between each.
@raise Invalid_argument if the result is longer than
{!Sys.max_string_length} bytes. *)
(** {1:predicates Predicates and comparisons} *)
val equal : t -> t -> bool
(** [equal s0 s1] is [true] iff [s0] and [s1] are character-wise equal.
@since 4.03.0 (4.05.0 in StringLabels) *)
val compare : t -> t -> int
(** [compare s0 s1] sorts [s0] and [s1] in lexicographical order. [compare]
behaves like {!Stdlib.compare} on strings but may be more efficient. *)
val starts_with :
prefix (* comment thwarts tools/sync_stdlib_docs *) :string -> string -> bool
(** [starts_with ][~][prefix s] is [true] iff [s] starts with [prefix].
@since 4.12.0 *)
val ends_with :
suffix (* comment thwarts tools/sync_stdlib_docs *) :string -> string -> bool
(** [ends_with ~suffix s] is [true] iff [s] ends with [suffix].
@since 4.12.0 *)
val contains_from : string -> int -> char -> bool
(** [contains_from s start c] is [true] iff [c] appears in [s] after position
[start].
@raise Invalid_argument if [start] is not a valid position in [s]. *)
val rcontains_from : string -> int -> char -> bool
(** [rcontains_from s stop c] is [true] iff [c] appears in [s] before position
[stop+1].
@raise Invalid_argument if [stop < 0] or [stop+1] is not a valid
position in [s]. *)
val contains : string -> char -> bool
(** [contains s c] is {!String.contains_from}[ s 0 c]. *)
(** {1:extract Extracting substrings} *)
val sub : string -> pos:int -> len:int -> string
(** [sub s ~pos ~len] is a string of length [len], containing the
substring of [s] that starts at position [pos] and has length
[len].
@raise Invalid_argument if [pos] and [len] do not designate a valid
substring of [s]. *)
val split_on_char : sep:char -> string -> string list
(** [split_on_char ~sep s] is the list of all (possibly empty)
substrings of [s] that are delimited by the character [sep].
The function's result is specified by the following invariants:
{ul
{- The list is not empty.}
{- Concatenating its elements using [sep] as a separator returns a
string equal to the input ([concat (make 1 sep)
(split_on_char sep s) = s]).}
{- No string in the result contains the [sep] character.}}
@since 4.04.0 (4.05.0 in StringLabels) *)
(** {1:transforming Transforming} *)
val map : f:(char -> char) -> string -> string
(** [map f s] is the string resulting from applying [f] to all the
characters of [s] in increasing order.
@since 4.00.0 *)
val mapi : f:(int -> char -> char) -> string -> string
(** [mapi ~f s] is like {!map} but the index of the character is also
passed to [f].
@since 4.02.0 *)
val trim : string -> string
(** [trim s] is [s] without leading and trailing whitespace. Whitespace
characters are: [' '], ['\x0C'] (form feed), ['\n'], ['\r'], and ['\t'].
@since 4.00.0 *)
val escaped : string -> string
(** [escaped s] is [s] with special characters represented by escape
sequences, following the lexical conventions of OCaml.
All characters outside the US-ASCII printable range \[0x20;0x7E\] are
escaped, as well as backslash (0x2F) and double-quote (0x22).
The function {!Scanf.unescaped} is a left inverse of [escaped],
i.e. [Scanf.unescaped (escaped s) = s] for any string [s] (unless
[escaped s] fails).
@raise Invalid_argument if the result is longer than
{!Sys.max_string_length} bytes. *)
val uppercase_ascii : string -> string
(** [uppercase_ascii s] is [s] with all lowercase letters
translated to uppercase, using the US-ASCII character set.
@since 4.03.0 (4.05.0 in StringLabels) *)
val lowercase_ascii : string -> string
(** [lowercase_ascii s] is [s] with all uppercase letters translated
to lowercase, using the US-ASCII character set.
@since 4.03.0 (4.05.0 in StringLabels) *)
val capitalize_ascii : string -> string
(** [capitalize_ascii s] is [s] with the first character set to
uppercase, using the US-ASCII character set.
@since 4.03.0 (4.05.0 in StringLabels) *)
val uncapitalize_ascii : string -> string
(** [uncapitalize_ascii s] is [s] with the first character set to lowercase,
using the US-ASCII character set.
@since 4.03.0 (4.05.0 in StringLabels) *)
(** {1:traversing Traversing} *)
val iter : f:(char -> unit) -> string -> unit
(** [iter ~f s] applies function [f] in turn to all the characters of [s].
It is equivalent to [f s.[0]; f s.[1]; ...; f s.[length s - 1]; ()]. *)
val iteri : f:(int -> char -> unit) -> string -> unit
(** [iteri] is like {!iter}, but the function is also given the
corresponding character index.
@since 4.00.0 *)
(** {1:searching Searching} *)
val index_from : string -> int -> char -> int
(** [index_from s i c] is the index of the first occurrence of [c] in
[s] after position [i].
@raise Not_found if [c] does not occur in [s] after position [i].
@raise Invalid_argument if [i] is not a valid position in [s]. *)
val index_from_opt : string -> int -> char -> int option
(** [index_from_opt s i c] is the index of the first occurrence of [c]
in [s] after position [i] (if any).
@raise Invalid_argument if [i] is not a valid position in [s].
@since 4.05 *)
val rindex_from : string -> int -> char -> int
(** [rindex_from s i c] is the index of the last occurrence of [c] in
[s] before position [i+1].
@raise Not_found if [c] does not occur in [s] before position [i+1].
@raise Invalid_argument if [i+1] is not a valid position in [s]. *)
val rindex_from_opt : string -> int -> char -> int option
(** [rindex_from_opt s i c] is the index of the last occurrence of [c]
in [s] before position [i+1] (if any).
@raise Invalid_argument if [i+1] is not a valid position in [s].
@since 4.05 *)
val index : string -> char -> int
(** [index s c] is {!String.index_from}[ s 0 c]. *)
val index_opt : string -> char -> int option
(** [index_opt s c] is {!String.index_from_opt}[ s 0 c].
@since 4.05 *)
val rindex : string -> char -> int
(** [rindex s c] is {!String.rindex_from}[ s (length s - 1) c]. *)
val rindex_opt : string -> char -> int option
(** [rindex_opt s c] is {!String.rindex_from_opt}[ s (length s - 1) c].
@since 4.05 *)
(** {1:converting Converting} *)
val to_seq : t -> char Seq.t
(** [to_seq s] is a sequence made of the string's characters in
increasing order. In ["unsafe-string"] mode, modifications of the string
during iteration will be reflected in the iterator.
@since 4.07 *)
val to_seqi : t -> (int * char) Seq.t
(** [to_seqi s] is like {!to_seq} but also tuples the corresponding index.
@since 4.07 *)
val of_seq : char Seq.t -> t
(** [of_seq s] is a string made of the sequence's characters.
@since 4.07 *)
(** {1:deprecated Deprecated functions} *)
external create : int -> bytes = "caml_create_string"
[@@ocaml.deprecated "Use Bytes.create/BytesLabels.create instead."]
(** [create n] returns a fresh byte sequence of length [n].
The sequence is uninitialized and contains arbitrary bytes.
@raise Invalid_argument if [n < 0] or [n > ]{!Sys.max_string_length}.
@deprecated This is a deprecated alias of
{!Bytes.create}/{!BytesLabels.create}. *)
external set : bytes -> int -> char -> unit = "%string_safe_set"
[@@ocaml.deprecated "Use Bytes.set/BytesLabels.set instead."]
(** [set s n c] modifies byte sequence [s] in place,
replacing the byte at index [n] with [c].
You can also write [s.[n] <- c] instead of [set s n c].
@raise Invalid_argument if [n] is not a valid index in [s].
@deprecated This is a deprecated alias of
{!Bytes.set}/{!BytesLabels.set}. *)
val blit :
src:string -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int -> unit
(** [blit ~src ~src_pos ~dst ~dst_pos ~len] copies [len] bytes
from the string [src], starting at index [src_pos],
to byte sequence [dst], starting at character number [dst_pos].
@raise Invalid_argument if [src_pos] and [len] do not
designate a valid range of [src], or if [dst_pos] and [len]
do not designate a valid range of [dst]. *)
val copy : string -> string
[@@ocaml.deprecated "Strings now immutable: no need to copy"]
(** Return a copy of the given string.
@deprecated Because strings are immutable, it doesn't make much
sense to make identical copies of them. *)
val fill : bytes -> pos:int -> len:int -> char -> unit
[@@ocaml.deprecated "Use Bytes.fill/BytesLabels.fill instead."]
(** [fill s ~pos ~len c] modifies byte sequence [s] in place,
replacing [len] bytes by [c], starting at [pos].
@raise Invalid_argument if [pos] and [len] do not
designate a valid substring of [s].
@deprecated This is a deprecated alias of
{!Bytes.fill}/{!BytesLabels.fill}. *)
val uppercase : string -> string
[@@ocaml.deprecated
"Use String.uppercase_ascii/StringLabels.uppercase_ascii instead."]
(** Return a copy of the argument, with all lowercase letters
translated to uppercase, including accented letters of the ISO
Latin-1 (8859-1) character set.
@deprecated Functions operating on Latin-1 character set are deprecated. *)
val lowercase : string -> string
[@@ocaml.deprecated
"Use String.lowercase_ascii/StringLabels.lowercase_ascii instead."]
(** Return a copy of the argument, with all uppercase letters
translated to lowercase, including accented letters of the ISO
Latin-1 (8859-1) character set.
@deprecated Functions operating on Latin-1 character set are deprecated. *)
val capitalize : string -> string
[@@ocaml.deprecated
"Use String.capitalize_ascii/StringLabels.capitalize_ascii instead."]
(** Return a copy of the argument, with the first character set to uppercase,
using the ISO Latin-1 (8859-1) character set..
@deprecated Functions operating on Latin-1 character set are deprecated. *)
val uncapitalize : string -> string
[@@ocaml.deprecated
"Use String.uncapitalize_ascii/StringLabels.uncapitalize_ascii instead."]
(** Return a copy of the argument, with the first character set to lowercase,
using the ISO Latin-1 (8859-1) character set.
@deprecated Functions operating on Latin-1 character set are deprecated. *)
(**/**)
(* The following is for system use only. Do not call directly. *)
external unsafe_get : string -> int -> char = "%string_unsafe_get"
external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
[@@ocaml.deprecated]
external unsafe_blit :
src:string -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int ->
unit = "caml_blit_string" [@@noalloc]
external unsafe_fill :
bytes -> pos:int -> len:int -> char -> unit = "caml_fill_string" [@@noalloc]
[@@ocaml.deprecated]