Merge pull request #9872 from xavierleroy/seek-text-channels
Revised {in,out}_channel_length and seek_in for channels in text modemaster
commit
1b48b5aa3c
6
Changes
6
Changes
|
@ -408,7 +408,6 @@ Working version
|
|||
cancause link-time errors with link-time optimization (LTO).
|
||||
(Xavier Leroy, report by Richard Jones, review by Nicolás Ojeda Bär)
|
||||
|
||||
|
||||
- #9753: fix build for Android
|
||||
(Github user @EduardoRFS, review by Xavier Leroy)
|
||||
|
||||
|
@ -418,6 +417,11 @@ Working version
|
|||
- #9860: wrong range constraint for subtract immediate on zSystems / s390x
|
||||
(Xavier Leroy, review by Stephen Dolan)
|
||||
|
||||
- #9868, #9872: bugs in {in,out}_channel_length and seek_in
|
||||
for files opened in text mode under Windows
|
||||
(Xavier Leroy, report by Alain Frisch, review by Nicolás Ojeda Bär
|
||||
and Alain Frisch)
|
||||
|
||||
|
||||
OCaml 4.11.1
|
||||
------------
|
||||
|
|
|
@ -56,6 +56,7 @@ enum {
|
|||
CHANNEL_FLAG_BLOCKING_WRITE = 2, /* Don't release master lock when writing */
|
||||
#endif
|
||||
CHANNEL_FLAG_MANAGED_BY_GC = 4, /* Free and close using GC finalization */
|
||||
CHANNEL_TEXT_MODE = 8, /* "Text mode" for Windows and Cygwin */
|
||||
};
|
||||
|
||||
/* For an output channel:
|
||||
|
|
55
runtime/io.c
55
runtime/io.c
|
@ -80,6 +80,17 @@ static void check_pending(struct channel *channel)
|
|||
}
|
||||
}
|
||||
|
||||
Caml_inline int descriptor_is_in_binary_mode(int fd)
|
||||
{
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
int oldmode = setmode(fd, O_TEXT);
|
||||
if (oldmode == O_BINARY) setmode(fd, O_BINARY);
|
||||
return oldmode == O_BINARY;
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
CAMLexport struct channel * caml_open_descriptor_in(int fd)
|
||||
{
|
||||
struct channel * channel;
|
||||
|
@ -95,7 +106,7 @@ CAMLexport struct channel * caml_open_descriptor_in(int fd)
|
|||
channel->revealed = 0;
|
||||
channel->old_revealed = 0;
|
||||
channel->refcount = 0;
|
||||
channel->flags = 0;
|
||||
channel->flags = descriptor_is_in_binary_mode(fd) ? 0 : CHANNEL_TEXT_MODE;
|
||||
channel->next = caml_all_opened_channels;
|
||||
channel->prev = NULL;
|
||||
channel->name = NULL;
|
||||
|
@ -139,34 +150,32 @@ CAMLexport void caml_close_channel(struct channel *channel)
|
|||
|
||||
CAMLexport file_offset caml_channel_size(struct channel *channel)
|
||||
{
|
||||
file_offset offset;
|
||||
file_offset end;
|
||||
file_offset here, end;
|
||||
int fd;
|
||||
check_pending(channel);
|
||||
|
||||
check_pending(channel);
|
||||
/* We extract data from [channel] before dropping the OCaml lock, in case
|
||||
someone else touches the block. */
|
||||
fd = channel->fd;
|
||||
offset = channel->offset;
|
||||
here = channel->flags & CHANNEL_TEXT_MODE ? -1 : channel->offset;
|
||||
caml_enter_blocking_section_no_pending();
|
||||
end = lseek(fd, 0, SEEK_END);
|
||||
if (end == -1 || lseek(fd, offset, SEEK_SET) != offset) {
|
||||
caml_leave_blocking_section();
|
||||
caml_sys_error(NO_ARG);
|
||||
if (here == -1) {
|
||||
here = lseek(fd, 0, SEEK_CUR);
|
||||
if (here == -1) goto error;
|
||||
}
|
||||
end = lseek(fd, 0, SEEK_END);
|
||||
if (end == -1) goto error;
|
||||
if (lseek(fd, here, SEEK_SET) != here) goto error;
|
||||
caml_leave_blocking_section();
|
||||
return end;
|
||||
error:
|
||||
caml_leave_blocking_section();
|
||||
caml_sys_error(NO_ARG);
|
||||
}
|
||||
|
||||
CAMLexport int caml_channel_binary_mode(struct channel *channel)
|
||||
{
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
int oldmode = setmode(channel->fd, O_BINARY);
|
||||
if (oldmode == O_TEXT) setmode(channel->fd, O_TEXT);
|
||||
return oldmode == O_BINARY;
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
return channel->flags & CHANNEL_TEXT_MODE ? 0 : 1;
|
||||
}
|
||||
|
||||
/* Output */
|
||||
|
@ -349,8 +358,9 @@ CAMLexport intnat caml_really_getblock(struct channel *chan, char *p, intnat n)
|
|||
|
||||
CAMLexport void caml_seek_in(struct channel *channel, file_offset dest)
|
||||
{
|
||||
if (dest >= channel->offset - (channel->max - channel->buff) &&
|
||||
dest <= channel->offset) {
|
||||
if (dest >= channel->offset - (channel->max - channel->buff)
|
||||
&& dest <= channel->offset
|
||||
&& (channel->flags & CHANNEL_TEXT_MODE) == 0) {
|
||||
channel->curr = channel->max - (channel->offset - dest);
|
||||
} else {
|
||||
caml_enter_blocking_section_no_pending();
|
||||
|
@ -592,13 +602,14 @@ static file_offset ml_channel_size(value vchannel)
|
|||
Lock(channel);
|
||||
size = caml_channel_size(Channel(vchannel));
|
||||
Unlock(channel);
|
||||
if (size > Max_long) { errno = EOVERFLOW; caml_sys_error(NO_ARG); }
|
||||
CAMLreturnT(file_offset, size);
|
||||
}
|
||||
|
||||
CAMLprim value caml_ml_channel_size(value vchannel)
|
||||
{
|
||||
return Val_long(ml_channel_size(vchannel));
|
||||
file_offset size = ml_channel_size(vchannel);
|
||||
if (size > Max_long) { errno = EOVERFLOW; caml_sys_error(NO_ARG); }
|
||||
return Val_long(size);
|
||||
}
|
||||
|
||||
CAMLprim value caml_ml_channel_size_64(value vchannel)
|
||||
|
@ -621,6 +632,10 @@ CAMLprim value caml_ml_set_binary_mode(value vchannel, value mode)
|
|||
#endif
|
||||
if (setmode(channel->fd, Bool_val(mode) ? O_BINARY : O_TEXT) == -1)
|
||||
caml_sys_error(NO_ARG);
|
||||
if (Bool_val(mode))
|
||||
channel->flags &= ~CHANNEL_TEXT_MODE;
|
||||
else
|
||||
channel->flags |= CHANNEL_TEXT_MODE;
|
||||
#endif
|
||||
return Val_unit;
|
||||
}
|
||||
|
|
|
@ -998,7 +998,13 @@ val seek_out : out_channel -> int -> unit
|
|||
val pos_out : out_channel -> int
|
||||
(** Return the current writing position for the given channel. Does
|
||||
not work on channels opened with the [Open_append] flag (returns
|
||||
unspecified results). *)
|
||||
unspecified results).
|
||||
For files opened in text mode under Windows, the returned position
|
||||
is approximate (owing to end-of-line conversion); in particular,
|
||||
saving the current position with [pos_out], then going back to
|
||||
this position using [seek_out] will not work. For this
|
||||
programming idiom to work reliably and portably, the file must be
|
||||
opened in binary mode. *)
|
||||
|
||||
val out_channel_length : out_channel -> int
|
||||
(** Return the size (number of characters) of the regular file
|
||||
|
@ -1113,7 +1119,13 @@ val seek_in : in_channel -> int -> unit
|
|||
files of other kinds, the behavior is unspecified. *)
|
||||
|
||||
val pos_in : in_channel -> int
|
||||
(** Return the current reading position for the given channel. *)
|
||||
(** Return the current reading position for the given channel. For
|
||||
files opened in text mode under Windows, the returned position is
|
||||
approximate (owing to end-of-line conversion); in particular,
|
||||
saving the current position with [pos_in], then going back to this
|
||||
position using [seek_in] will not work. For this programming
|
||||
idiom to work reliably and portably, the file must be opened in
|
||||
binary mode. *)
|
||||
|
||||
val in_channel_length : in_channel -> int
|
||||
(** Return the size (number of characters) of the regular file
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
(* TEST *)
|
||||
|
||||
let len = 15000
|
||||
let rounds = 10
|
||||
|
||||
let () =
|
||||
let oc = open_out "data.txt" in
|
||||
for i = 1 to rounds do
|
||||
Printf.fprintf oc "%s\n%!" (String.make len 'x');
|
||||
done;
|
||||
close_out oc;
|
||||
let ic = open_in "data.txt" in
|
||||
let l1 = in_channel_length ic in
|
||||
for i = 1 to rounds do
|
||||
let s = input_line ic in
|
||||
assert (String.length s = len);
|
||||
let l = in_channel_length ic in
|
||||
assert (l = l1)
|
||||
done;
|
||||
close_in ic
|
|
@ -0,0 +1,19 @@
|
|||
(* TEST *)
|
||||
|
||||
let () =
|
||||
let oc = open_out_bin "data.txt" in
|
||||
output_string oc "0\r\n1\r\n";
|
||||
close_out oc;
|
||||
(* Open in text mode to trigger EOL conversion under Windows *)
|
||||
let ic = open_in "data.txt" in
|
||||
ignore (input_line ic);
|
||||
seek_in ic 3;
|
||||
(* Normally we should be looking at "1\r\n", which will be read as
|
||||
"1" under Windows because of EOL conversion and "1\r" otherwise.
|
||||
What goes wrong with the old implementation of seek_in is that
|
||||
we have "0\n\1\n" in the channel buffer and have read "0\n" already,
|
||||
so we think we are at position 2, and the seek to position 3
|
||||
just advances by one in the buffer, pointing to "\n" instead of "1\n". *)
|
||||
let l = input_line ic in
|
||||
close_in ic;
|
||||
assert (l = "1" || l = "1\r")
|
Loading…
Reference in New Issue