Changement emplacements/syntaxe des commentaires
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3921 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02master
parent
1a1d67bb95
commit
c49c7996c5
|
@ -12,180 +12,232 @@
|
||||||
|
|
||||||
(* $Id$ *)
|
(* $Id$ *)
|
||||||
|
|
||||||
(* Module [Str]: regular expressions and high-level string processing *)
|
(** Regular expressions and high-level string processing *)
|
||||||
|
|
||||||
(*** Regular expressions *)
|
|
||||||
|
|
||||||
|
(** {2 Regular expressions} *)
|
||||||
|
|
||||||
|
|
||||||
|
(** The type of compiled regular expressions. *)
|
||||||
type regexp
|
type regexp
|
||||||
(* The type of compiled regular expressions. *)
|
|
||||||
|
|
||||||
|
|
||||||
|
(** Compile a regular expression. The syntax for regular expressions
|
||||||
|
is the same as in Gnu Emacs. The special characters are
|
||||||
|
[$^.*+?[]]. The following constructs are recognized:
|
||||||
|
- [. ] matches any character except newline
|
||||||
|
- [* ] (postfix) matches the previous expression zero, one or
|
||||||
|
several times
|
||||||
|
- [+ ] (postfix) matches the previous expression one or
|
||||||
|
several times
|
||||||
|
- [? ] (postfix) matches the previous expression once or
|
||||||
|
not at all
|
||||||
|
- [[..] ] character set; ranges are denoted with [-], as in [[a-z]];
|
||||||
|
an initial [^], as in [[^0-9]], complements the set
|
||||||
|
- [^ ] matches at beginning of line
|
||||||
|
- [$ ] matches at end of line
|
||||||
|
- [\| ] (infix) alternative between two expressions
|
||||||
|
- [\(..\)] grouping and naming of the enclosed expression
|
||||||
|
- [\1 ] the text matched by the first [\(...\)] expression
|
||||||
|
([\2] for the second expression, etc)
|
||||||
|
- [\b ] matches word boundaries
|
||||||
|
- [\ ] quotes special characters. *)
|
||||||
val regexp: string -> regexp
|
val regexp: string -> regexp
|
||||||
(* Compile a regular expression. The syntax for regular expressions
|
|
||||||
is the same as in Gnu Emacs. The special characters are
|
(** Same as [regexp], but the compiled expression will match text
|
||||||
[$^.*+?[]]. The following constructs are recognized:
|
in a case-insensitive way: uppercase and lowercase letters will
|
||||||
- [. ] matches any character except newline
|
be considered equivalent. *)
|
||||||
- [* ] (postfix) matches the previous expression zero, one or
|
|
||||||
several times
|
|
||||||
- [+ ] (postfix) matches the previous expression one or
|
|
||||||
several times
|
|
||||||
- [? ] (postfix) matches the previous expression once or
|
|
||||||
not at all
|
|
||||||
- [[..] ] character set; ranges are denoted with [-], as in [[a-z]];
|
|
||||||
an initial [^], as in [[^0-9]], complements the set
|
|
||||||
- [^ ] matches at beginning of line
|
|
||||||
- [$ ] matches at end of line
|
|
||||||
- [\| ] (infix) alternative between two expressions
|
|
||||||
- [\(..\)] grouping and naming of the enclosed expression
|
|
||||||
- [\1 ] the text matched by the first [\(...\)] expression
|
|
||||||
([\2] for the second expression, etc)
|
|
||||||
- [\b ] matches word boundaries
|
|
||||||
- [\ ] quotes special characters. *)
|
|
||||||
val regexp_case_fold: string -> regexp
|
val regexp_case_fold: string -> regexp
|
||||||
(* Same as [regexp], but the compiled expression will match text
|
|
||||||
in a case-insensitive way: uppercase and lowercase letters will
|
(** [Str.quote s] returns a regexp string that matches exactly
|
||||||
be considered equivalent. *)
|
[s] and nothing else. *)
|
||||||
val quote: string -> string
|
val quote: string -> string
|
||||||
(* [Str.quote s] returns a regexp string that matches exactly
|
|
||||||
[s] and nothing else. *)
|
(** [Str.regexp_string s] returns a regular expression
|
||||||
|
that matches exactly [s] and nothing else.*)
|
||||||
val regexp_string: string -> regexp
|
val regexp_string: string -> regexp
|
||||||
|
|
||||||
|
(** [Str.regexp_string_case_fold] is similar to {!Str.regexp_string},
|
||||||
|
but the regexp matches in a case-insensitive way. *)
|
||||||
val regexp_string_case_fold: string -> regexp
|
val regexp_string_case_fold: string -> regexp
|
||||||
(* [Str.regexp_string s] returns a regular expression
|
|
||||||
that matches exactly [s] and nothing else.
|
|
||||||
[Str.regexp_string_case_fold] is similar, but the regexp
|
|
||||||
matches in a case-insensitive way. *)
|
|
||||||
|
|
||||||
(*** String matching and searching *)
|
|
||||||
|
(** {2 String matching and searching} *)
|
||||||
|
|
||||||
|
|
||||||
|
(** [string_match r s start] tests whether the characters in [s]
|
||||||
|
starting at position [start] match the regular expression [r].
|
||||||
|
The first character of a string has position [0], as usual. *)
|
||||||
external string_match: regexp -> string -> int -> bool
|
external string_match: regexp -> string -> int -> bool
|
||||||
= "str_string_match"
|
= "str_string_match"
|
||||||
(* [string_match r s start] tests whether the characters in [s]
|
|
||||||
starting at position [start] match the regular expression [r].
|
(** [search_forward r s start] searchs the string [s] for a substring
|
||||||
The first character of a string has position [0], as usual. *)
|
matching the regular expression [r]. The search starts at position
|
||||||
|
[start] and proceeds towards the end of the string.
|
||||||
|
Return the position of the first character of the matched
|
||||||
|
substring, or raise [Not_found] if no substring matches. *)
|
||||||
external search_forward: regexp -> string -> int -> int
|
external search_forward: regexp -> string -> int -> int
|
||||||
= "str_search_forward"
|
= "str_search_forward"
|
||||||
(* [search_forward r s start] searchs the string [s] for a substring
|
|
||||||
matching the regular expression [r]. The search starts at position
|
(** Same as {!Str.search_forward}, but the search proceeds towards the
|
||||||
[start] and proceeds towards the end of the string.
|
beginning of the string. *)
|
||||||
Return the position of the first character of the matched
|
|
||||||
substring, or raise [Not_found] if no substring matches. *)
|
|
||||||
external search_backward: regexp -> string -> int -> int
|
external search_backward: regexp -> string -> int -> int
|
||||||
= "str_search_backward"
|
= "str_search_backward"
|
||||||
(* Same as [search_forward], but the search proceeds towards the
|
|
||||||
beginning of the string. *)
|
(** Similar to {!Str.string_match}, but succeeds whenever the argument
|
||||||
|
string is a prefix of a string that matches. This includes
|
||||||
|
the case of a true complete match. *)
|
||||||
external string_partial_match: regexp -> string -> int -> bool
|
external string_partial_match: regexp -> string -> int -> bool
|
||||||
= "str_string_partial_match"
|
= "str_string_partial_match"
|
||||||
(* Similar to [string_match], but succeeds whenever the argument
|
|
||||||
string is a prefix of a string that matches. This includes
|
|
||||||
the case of a true complete match. *)
|
|
||||||
|
|
||||||
|
(** [matched_string s] returns the substring of [s] that was matched
|
||||||
|
by the latest {!Str.string_match}, {!Str.search_forward} or
|
||||||
|
{!Str.search_backward}.
|
||||||
|
The user must make sure that the parameter [s] is the same string
|
||||||
|
that was passed to the matching or searching function. *)
|
||||||
val matched_string: string -> string
|
val matched_string: string -> string
|
||||||
(* [matched_string s] returns the substring of [s] that was matched
|
|
||||||
by the latest [string_match], [search_forward] or [search_backward].
|
(** [match_beginning()] returns the position of the first character
|
||||||
The user must make sure that the parameter [s] is the same string
|
of the substring that was matched by {!Str.string_match},
|
||||||
that was passed to the matching or searching function. *)
|
{!Str.search_forward} or {!Str.search_backward}. *)
|
||||||
val match_beginning: unit -> int
|
val match_beginning: unit -> int
|
||||||
|
|
||||||
|
(** [match_end()] returns the position of the character following the
|
||||||
|
last character of the substring that was matched by [string_match],
|
||||||
|
[search_forward] or [search_backward]. *)
|
||||||
val match_end: unit -> int
|
val match_end: unit -> int
|
||||||
(* [match_beginning()] returns the position of the first character
|
|
||||||
of the substring that was matched by [string_match],
|
(** [matched_group n s] returns the substring of [s] that was matched
|
||||||
[search_forward] or [search_backward]. [match_end()] returns
|
by the [n]th group [\(...\)] of the regular expression during
|
||||||
the position of the character following the last character of
|
the latest {!Str.string_match}, {!Str.search_forward} or
|
||||||
the matched substring. *)
|
{!Str.search_backward}.
|
||||||
|
The user must make sure that the parameter [s] is the same string
|
||||||
|
that was passed to the matching or searching function.
|
||||||
|
[matched_group n s] raises [Not_found] if the [n]th group
|
||||||
|
of the regular expression was not matched. This can happen
|
||||||
|
with groups inside alternatives [\|], options [?]
|
||||||
|
or repetitions [*]. For instance, the empty string will match
|
||||||
|
[\(a\)*], but [matched_group 1 ""] will raise [Not_found]
|
||||||
|
because the first group itself was not matched. *)
|
||||||
val matched_group: int -> string -> string
|
val matched_group: int -> string -> string
|
||||||
(* [matched_group n s] returns the substring of [s] that was matched
|
|
||||||
by the [n]th group [\(...\)] of the regular expression during
|
(** [group_beginning n] returns the position of the first character
|
||||||
the latest [string_match], [search_forward] or [search_backward].
|
of the substring that was matched by the [n]th group of
|
||||||
The user must make sure that the parameter [s] is the same string
|
the regular expression.
|
||||||
that was passed to the matching or searching function.
|
@raise Not_found if the [n]th group of the regular expression
|
||||||
[matched_group n s] raises [Not_found] if the [n]th group
|
was not matched. *)
|
||||||
of the regular expression was not matched. This can happen
|
|
||||||
with groups inside alternatives [\|], options [?]
|
|
||||||
or repetitions [*]. For instance, the empty string will match
|
|
||||||
[\(a\)*], but [matched_group 1 ""] will raise [Not_found]
|
|
||||||
because the first group itself was not matched. *)
|
|
||||||
val group_beginning: int -> int
|
val group_beginning: int -> int
|
||||||
|
|
||||||
|
(** [group_end n] returns
|
||||||
|
the position of the character following the last character of
|
||||||
|
substring that was matched by the [n]th group of the regular expression.
|
||||||
|
@raise Not_found if the [n]th group of the regular expression
|
||||||
|
was not matched. *)
|
||||||
val group_end: int -> int
|
val group_end: int -> int
|
||||||
(* [group_beginning n] returns the position of the first character
|
|
||||||
of the substring that was matched by the [n]th group of
|
|
||||||
the regular expression. [group_end n] returns
|
|
||||||
the position of the character following the last character of
|
|
||||||
the matched substring. Both functions raise [Not_found]
|
|
||||||
if the [n]th group of the regular expression
|
|
||||||
was not matched. *)
|
|
||||||
|
|
||||||
(*** Replacement *)
|
|
||||||
|
|
||||||
|
(** {2 Replacement} *)
|
||||||
|
|
||||||
|
|
||||||
|
(** [global_replace regexp templ s] returns a string identical to [s],
|
||||||
|
except that all substrings of [s] that match [regexp] have been
|
||||||
|
replaced by [templ]. The replacement template [templ] can contain
|
||||||
|
[\1], [\2], etc; these sequences will be replaced by the text
|
||||||
|
matched by the corresponding group in the regular expression.
|
||||||
|
[\0] stands for the text matched by the whole regular expression. *)
|
||||||
val global_replace: regexp -> string -> string -> string
|
val global_replace: regexp -> string -> string -> string
|
||||||
(* [global_replace regexp templ s] returns a string identical to [s],
|
|
||||||
except that all substrings of [s] that match [regexp] have been
|
(** Same as {!Str.global_replace}, except that only the first substring
|
||||||
replaced by [templ]. The replacement template [templ] can contain
|
matching the regular expression is replaced. *)
|
||||||
[\1], [\2], etc; these sequences will be replaced by the text
|
|
||||||
matched by the corresponding group in the regular expression.
|
|
||||||
[\0] stands for the text matched by the whole regular expression. *)
|
|
||||||
val replace_first: regexp -> string -> string -> string
|
val replace_first: regexp -> string -> string -> string
|
||||||
(* Same as [global_replace], except that only the first substring
|
|
||||||
matching the regular expression is replaced. *)
|
(** [global_substitute regexp subst s] returns a string identical
|
||||||
|
to [s], except that all substrings of [s] that match [regexp]
|
||||||
|
have been replaced by the result of function [subst]. The
|
||||||
|
function [subst] is called once for each matching substring,
|
||||||
|
and receives [s] (the whole text) as argument. *)
|
||||||
val global_substitute:
|
val global_substitute:
|
||||||
regexp -> (string -> string) -> string -> string
|
regexp -> (string -> string) -> string -> string
|
||||||
(* [global_substitute regexp subst s] returns a string identical
|
|
||||||
to [s], except that all substrings of [s] that match [regexp]
|
(** Same as {!Str.global_substitute}, except that only the first substring
|
||||||
have been replaced by the result of function [subst]. The
|
matching the regular expression is replaced. *)
|
||||||
function [subst] is called once for each matching substring,
|
|
||||||
and receives [s] (the whole text) as argument. *)
|
|
||||||
val substitute_first:
|
val substitute_first:
|
||||||
regexp -> (string -> string) -> string -> string
|
regexp -> (string -> string) -> string -> string
|
||||||
(* Same as [global_substitute], except that only the first substring
|
|
||||||
matching the regular expression is replaced. *)
|
(** [replace_matched repl s] returns the replacement text [repl]
|
||||||
|
in which [\1], [\2], etc. have been replaced by the text
|
||||||
|
matched by the corresponding groups in the most recent matching
|
||||||
|
operation. [s] must be the same string that was matched during
|
||||||
|
this matching operation. *)
|
||||||
val replace_matched : string -> string -> string
|
val replace_matched : string -> string -> string
|
||||||
(* [replace_matched repl s] returns the replacement text [repl]
|
|
||||||
in which [\1], [\2], etc. have been replaced by the text
|
|
||||||
matched by the corresponding groups in the most recent matching
|
|
||||||
operation. [s] must be the same string that was matched during
|
|
||||||
this matching operation. *)
|
|
||||||
|
|
||||||
(*** Splitting *)
|
(** {2 Splitting} *)
|
||||||
|
|
||||||
|
|
||||||
|
(** [split r s] splits [s] into substrings, taking as delimiters
|
||||||
|
the substrings that match [r], and returns the list of substrings.
|
||||||
|
For instance, [split (regexp "[ \t]+") s] splits [s] into
|
||||||
|
blank-separated words. An occurrence of the delimiter at the
|
||||||
|
beginning and at the end of the string is ignored. *)
|
||||||
val split: regexp -> string -> string list
|
val split: regexp -> string -> string list
|
||||||
(* [split r s] splits [s] into substrings, taking as delimiters
|
|
||||||
the substrings that match [r], and returns the list of substrings.
|
(** Same as {!Str.split}, but splits into at most [n] substrings,
|
||||||
For instance, [split (regexp "[ \t]+") s] splits [s] into
|
where [n] is the extra integer parameter. *)
|
||||||
blank-separated words. An occurrence of the delimiter at the
|
|
||||||
beginning and at the end of the string is ignored. *)
|
|
||||||
val bounded_split: regexp -> string -> int -> string list
|
val bounded_split: regexp -> string -> int -> string list
|
||||||
(* Same as [split], but splits into at most [n] substrings,
|
|
||||||
where [n] is the extra integer parameter. *)
|
|
||||||
|
|
||||||
|
(** Same as {!Str.split} but occurrences of the
|
||||||
|
delimiter at the beginning and at the end of the string are
|
||||||
|
recognized and returned as empty strings in the result.
|
||||||
|
For instance, [split_delim (regexp " ") " abc "]
|
||||||
|
returns [[""; "abc"; ""]], while [split] with the same
|
||||||
|
arguments returns [["abc"]]. *)
|
||||||
val split_delim: regexp -> string -> string list
|
val split_delim: regexp -> string -> string list
|
||||||
|
|
||||||
|
(** Same as {!Str.bounded_split}, but occurrences of the
|
||||||
|
delimiter at the beginning and at the end of the string are
|
||||||
|
recognized and returned as empty strings in the result.
|
||||||
|
For instance, [split_delim (regexp " ") " abc "]
|
||||||
|
returns [[""; "abc"; ""]], while [split] with the same
|
||||||
|
arguments returns [["abc"]]. *)
|
||||||
val bounded_split_delim: regexp -> string -> int -> string list
|
val bounded_split_delim: regexp -> string -> int -> string list
|
||||||
(* Same as [split] and [bounded_split], but occurrences of the
|
|
||||||
delimiter at the beginning and at the end of the string are
|
|
||||||
recognized and returned as empty strings in the result.
|
|
||||||
For instance, [split_delim (regexp " ") " abc "]
|
|
||||||
returns [[""; "abc"; ""]], while [split] with the same
|
|
||||||
arguments returns [["abc"]]. *)
|
|
||||||
|
|
||||||
type split_result = Text of string | Delim of string
|
type split_result = Text of string | Delim of string
|
||||||
|
|
||||||
|
(** Same as {!Str.split_delim}, but returns
|
||||||
|
the delimiters as well as the substrings contained between
|
||||||
|
delimiters. The former are tagged [Delim] in the result list;
|
||||||
|
the latter are tagged [Text]. For instance,
|
||||||
|
[full_split (regexp "[{}]") "{ab}"] returns
|
||||||
|
[[Delim "{"; Text "ab"; Delim "}"]]. *)
|
||||||
val full_split: regexp -> string -> split_result list
|
val full_split: regexp -> string -> split_result list
|
||||||
|
|
||||||
|
(** Same as {!Str.bounded_split_delim}, but returns
|
||||||
|
the delimiters as well as the substrings contained between
|
||||||
|
delimiters. The former are tagged [Delim] in the result list;
|
||||||
|
the latter are tagged [Text]. For instance,
|
||||||
|
[full_split (regexp "[{}]") "{ab}"] returns
|
||||||
|
[[Delim "{"; Text "ab"; Delim "}"]]. *)
|
||||||
val bounded_full_split: regexp -> string -> int -> split_result list
|
val bounded_full_split: regexp -> string -> int -> split_result list
|
||||||
(* Same as [split_delim] and [bounded_split_delim], but returns
|
|
||||||
the delimiters as well as the substrings contained between
|
|
||||||
delimiters. The former are tagged [Delim] in the result list;
|
|
||||||
the latter are tagged [Text]. For instance,
|
|
||||||
[full_split (regexp "[{}]") "{ab}"] returns
|
|
||||||
[[Delim "{"; Text "ab"; Delim "}"]]. *)
|
|
||||||
|
|
||||||
(*** Extracting substrings *)
|
|
||||||
|
|
||||||
|
(** {2 Extracting substrings} *)
|
||||||
|
|
||||||
|
|
||||||
|
(** [string_before s n] returns the substring of all characters of [s]
|
||||||
|
that precede position [n] (excluding the character at
|
||||||
|
position [n]). *)
|
||||||
val string_before: string -> int -> string
|
val string_before: string -> int -> string
|
||||||
(* [string_before s n] returns the substring of all characters of [s]
|
|
||||||
that precede position [n] (excluding the character at
|
(** [string_after s n] returns the substring of all characters of [s]
|
||||||
position [n]). *)
|
that follow position [n] (including the character at
|
||||||
|
position [n]). *)
|
||||||
val string_after: string -> int -> string
|
val string_after: string -> int -> string
|
||||||
(* [string_after s n] returns the substring of all characters of [s]
|
|
||||||
that follow position [n] (including the character at
|
(** [first_chars s n] returns the first [n] characters of [s].
|
||||||
position [n]). *)
|
This is the same function as {!Str.string_before}. *)
|
||||||
val first_chars: string -> int -> string
|
val first_chars: string -> int -> string
|
||||||
(* [first_chars s n] returns the first [n] characters of [s].
|
|
||||||
This is the same function as [string_before]. *)
|
(** [last_chars s n] returns the last [n] characters of [s]. *)
|
||||||
val last_chars: string -> int -> string
|
val last_chars: string -> int -> string
|
||||||
(* [last_chars s n] returns the last [n] characters of [s]. *)
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue