ocaml/tools/scrapelabels.ml

290 lines
9.9 KiB
OCaml
Raw Normal View History

(***********************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* Jacques Garrigue, Kyoto University RIMS *)
(* *)
(* Copyright 2001 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* $Id$ *)
open StdLabels
open Lexer301
let input_buffer = Buffer.create 16383
let input_function ic buf len =
let len = input ic buf 0 len in
Buffer.add_substring input_buffer buf 0 len;
len
let output_buffer = Buffer.create 16383
let modified = ref false
let modules =
ref [ "Arg"; "BigArray"; "Buffer"; "Condition"; "Dbm"; "Digest"; "Dynlink";
"Event"; "Filename"; "Format"; "Gc"; "Genlex"; "Graphics";
"Lexing"; "Marshal"; "Mutex"; "Parsing"; "Pervasives"; "Queue";
"Sort"; "Stack"; "Str"; "Stream"; "Sys";
"Thread"; "ThreadUnix"; "Weak" ]
let stdlabels = ["Array"; "List"; "String"]
let morelabels = ["Hashtbl"; "Map"; "Set"]
let alllabels = ref false
let noopen = ref false
exception Closing of token
let convert_impl buffer =
let input_pos = ref 0 in
let copy_input stop =
Buffer.add_substring output_buffer (Buffer.contents input_buffer)
!input_pos (stop - !input_pos);
input_pos := stop
in
let next_token () =
let token = Lexer301.token buffer
and start = Lexing.lexeme_start buffer
and stop = Lexing.lexeme_end buffer in
match token with
RPAREN | RBRACKET |BARRBRACKET | GREATERRBRACKET | END
| RBRACE | GREATERRBRACE ->
raise (Closing token)
| EOF ->
raise End_of_file
| _ ->
(token, start, stop)
in
let openunix = ref None and openstd = ref None and openmore = ref None in
let rec may_start (token, s, e) =
match token with
LIDENT _ -> search_start (dropext (next_token ()))
| UIDENT m when List.mem m !modules ->
may_discard (dropext (next_token ()))
| UIDENT m ->
List.iter ~f:
(fun (set,r) ->
if !r = None && List.mem m ~set then r := Some true)
[stdlabels, openstd; ["Unix"], openunix; morelabels, openmore];
search_start (next_token ())
| _ -> search_start (token, s, e)
and dropext (token, s, e) =
match token with
DOT ->
let (token, s, e) = next_token () in
begin match token with
LPAREN | LBRACKET | LBRACE ->
process_paren (token, s, e);
dropext (next_token ())
| UIDENT _ | LIDENT _ ->
dropext (next_token ())
| _ ->
prerr_endline ("bad index at position " ^ string_of_int s);
(token, s, e)
end
| _ ->
(token, s, e)
and may_discard (token, s, e) =
match token with
TILDE | LABEL _ ->
modified := true;
copy_input s; input_pos := e;
may_discard (next_token ())
| _ when !alllabels ->
may_discard (next_token ())
| LPAREN | LBRACKET | LBRACKETBAR | LBRACKETLESS | BEGIN
| LBRACE | LBRACELESS | STRUCT | SIG | OBJECT->
process_paren (token, s, e);
may_discard (next_token ())
| PREFIXOP _ ->
may_discard (next_token ())
| LIDENT _ | UIDENT _ ->
may_discard (dropext (next_token ()))
| BACKQUOTE ->
ignore (next_token ());
may_discard (next_token ())
| INT _ | CHAR _ | STRING _ | FLOAT _ | FALSE | TRUE ->
may_discard (next_token ())
| _ ->
search_start (token, s, e)
and search_start (token, s, e) =
match token with
LPAREN | LBRACKET | LBRACKETBAR | LBRACKETLESS | BEGIN
| LBRACE | LBRACELESS | STRUCT | SIG | OBJECT ->
process_paren (token, s, e);
search_start (next_token ())
| EQUAL | SEMI | SEMISEMI | MINUSGREATER | LESSMINUS | COMMA
| IF | THEN | ELSE | WHILE | TO | DOWNTO | DO | IN | MATCH | TRY
| INFIXOP0 _ | INFIXOP1 _ | INFIXOP2 _ | INFIXOP3 _ | INFIXOP4 _
| PLUS | MINUS | MINUSDOT | STAR | LESS | GREATER
| OR | BARBAR | AMPERSAND | AMPERAMPER | COLONEQUAL ->
may_start (next_token ())
| OPEN ->
begin match next_token () with
| UIDENT m, _, _ ->
List.iter
~f:(fun (set,r) -> if List.mem m ~set then r := Some false)
[stdlabels, openstd; ["Unix"], openunix; morelabels, openmore]
| _ -> ()
end;
search_start (next_token ())
| _ ->
search_start (next_token ())
and process_paren (token, s, e) =
try match token with
LPAREN | LBRACKET | LBRACKETBAR | LBRACKETLESS | BEGIN ->
may_start (next_token ())
| LBRACE | LBRACELESS | STRUCT | SIG | OBJECT ->
search_start (next_token ())
| _ ->
assert false
with Closing last ->
match token, last with
LPAREN, RPAREN
| (LBRACKET|LBRACKETBAR|LBRACKETLESS),
(RBRACKET|BARRBRACKET|GREATERRBRACKET)
| (BEGIN|STRUCT|SIG|OBJECT), END
| LBRACE, RBRACE
| LBRACELESS, GREATERRBRACE -> ()
| _ -> raise (Closing last)
in
let first = next_token () in
try
if !alllabels then may_discard first else may_start first
with End_of_file ->
copy_input (Buffer.length input_buffer);
if not !alllabels
&& List.exists (fun r -> !r = Some true) [openstd; openunix; openmore]
then begin
modified := true;
let text = Buffer.contents output_buffer in
Buffer.clear output_buffer;
let (token, s, _) = first in
Buffer.add_substring output_buffer text 0 s;
List.iter ~f:
(fun (r, s) ->
if !r = Some true then Buffer.add_string output_buffer s)
[ openstd, "open StdLabels\n"; openmore, "open MoreLabels\n";
openunix, "module Unix = UnixLabels\n" ];
let sep =
if List.mem token [CLASS; EXTERNAL; EXCEPTION; FUNCTOR; LET;
MODULE; FUNCTOR; TYPE; VAL]
then "\n"
else if token = OPEN then "" else ";;\n\n"
in
Buffer.add_string output_buffer sep;
Buffer.add_substring output_buffer text s (String.length text - s)
end
| Closing _ ->
prerr_endline ("bad closing token at position " ^
string_of_int (Lexing.lexeme_start buffer));
modified := false
type state = Out | Enter | In | Escape
let convert_intf buffer =
let input_pos = ref 0 in
let copy_input stop =
Buffer.add_substring output_buffer (Buffer.contents input_buffer)
!input_pos (stop - !input_pos);
input_pos := stop
in
let last = ref (EOF, 0, 0) in
let state = ref Out in
try while true do
let token = Lexer301.token buffer
and start = Lexing.lexeme_start buffer
and stop = Lexing.lexeme_end buffer
and last_token, last_start, last_stop = !last in
begin match token with
| EXCEPTION | CONSTRAINT ->
state := In
| VAL | EXTERNAL | CLASS | METHOD | TYPE | AND ->
state := Enter
| EQUAL when !state = Enter ->
state := In
| COLON ->
begin match !state, last_token with
| In, LIDENT _ ->
modified := true;
copy_input last_start;
input_pos := stop
| Enter, _ ->
state := In
| Escape, _ ->
state := In
| _ ->
state := Out
end
| LBRACE | SEMI | QUESTION when !state = In ->
state := Escape
| SEMISEMI | SIG | STRUCT | END | OBJECT | OPEN | INCLUDE | MODULE ->
state := Out
| EOF -> raise End_of_file
| _ -> ()
end;
last := (token, start, stop)
done with
End_of_file ->
copy_input (Buffer.length input_buffer)
let convert_file ~intf name =
let ic = open_in name in
Buffer.clear input_buffer;
Buffer.clear output_buffer;
modified := false;
begin
let convert = if intf then convert_intf else convert_impl in
try convert (Lexing.from_function (input_function ic)); close_in ic
with exn -> close_in ic; raise exn
end;
if !modified then begin
let backup = name ^ ".bak" in
if Sys.file_exists backup then Sys.remove name
else Sys.rename name backup;
let oc = open_out name in
Buffer.output_buffer oc output_buffer;
close_out oc
end
else prerr_endline ("No changes in " ^ name)
let _ =
let files = ref [] and intf = ref false
and keepstd = ref false and keepmore = ref false in
Arg.parse
[ "-intf", Arg.Set intf,
" remove all non-optional labels from an interface;\n" ^
" other options are ignored";
"-all", Arg.Set alllabels,
" remove all labels, possibly including optional ones!";
"-keepstd", Arg.Set keepstd,
" keep labels for Array, List, String and Unix";
"-keepmore", Arg.Set keepmore,
" keep also labels for Hashtbl, Map and Set; implies -keepstd";
"-m", Arg.String (fun s -> modules := s :: !modules),
"<module> remove also labels for <module>";
"-noopen", Arg.Set noopen,
" do not insert `open' statements for -keepstd/-keepmore" ]
(fun s -> files := s :: !files)
("Usage: scrapelabels <options> <source files>\n" ^
" Remove labels from function arguments in standard library modules.\n" ^
" With -intf option below, can also process interfaces.\n" ^
" Old files are renamed to <file>.bak if there is no backup yet.\n" ^
"Options are:");
if !keepmore then keepstd := true;
if not !keepstd then modules := "Unix" :: stdlabels @ !modules;
if not !keepmore then modules := morelabels @ !modules;
List.iter (List.rev !files) ~f:
begin fun name ->
prerr_endline ("Processing " ^ name);
Printexc.catch (convert_file ~intf:!intf) name
end