Merge pull request #9872 from xavierleroy/seek-text-channels

Revised {in,out}_channel_length and seek_in for channels in text mode
master
Xavier Leroy 2020-09-08 09:52:22 +02:00 committed by GitHub
commit 1b48b5aa3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 94 additions and 23 deletions

View File

@ -408,7 +408,6 @@ Working version
cancause link-time errors with link-time optimization (LTO).
(Xavier Leroy, report by Richard Jones, review by Nicolás Ojeda Bär)
- #9753: fix build for Android
(Github user @EduardoRFS, review by Xavier Leroy)
@ -418,6 +417,11 @@ Working version
- #9860: wrong range constraint for subtract immediate on zSystems / s390x
(Xavier Leroy, review by Stephen Dolan)
- #9868, #9872: bugs in {in,out}_channel_length and seek_in
for files opened in text mode under Windows
(Xavier Leroy, report by Alain Frisch, review by Nicolás Ojeda Bär
and Alain Frisch)
OCaml 4.11.1
------------

View File

@ -56,6 +56,7 @@ enum {
CHANNEL_FLAG_BLOCKING_WRITE = 2, /* Don't release master lock when writing */
#endif
CHANNEL_FLAG_MANAGED_BY_GC = 4, /* Free and close using GC finalization */
CHANNEL_TEXT_MODE = 8, /* "Text mode" for Windows and Cygwin */
};
/* For an output channel:

View File

@ -80,6 +80,17 @@ static void check_pending(struct channel *channel)
}
}
Caml_inline int descriptor_is_in_binary_mode(int fd)
{
#if defined(_WIN32) || defined(__CYGWIN__)
int oldmode = setmode(fd, O_TEXT);
if (oldmode == O_BINARY) setmode(fd, O_BINARY);
return oldmode == O_BINARY;
#else
return 1;
#endif
}
CAMLexport struct channel * caml_open_descriptor_in(int fd)
{
struct channel * channel;
@ -95,7 +106,7 @@ CAMLexport struct channel * caml_open_descriptor_in(int fd)
channel->revealed = 0;
channel->old_revealed = 0;
channel->refcount = 0;
channel->flags = 0;
channel->flags = descriptor_is_in_binary_mode(fd) ? 0 : CHANNEL_TEXT_MODE;
channel->next = caml_all_opened_channels;
channel->prev = NULL;
channel->name = NULL;
@ -139,34 +150,32 @@ CAMLexport void caml_close_channel(struct channel *channel)
CAMLexport file_offset caml_channel_size(struct channel *channel)
{
file_offset offset;
file_offset end;
file_offset here, end;
int fd;
check_pending(channel);
check_pending(channel);
/* We extract data from [channel] before dropping the OCaml lock, in case
someone else touches the block. */
fd = channel->fd;
offset = channel->offset;
here = channel->flags & CHANNEL_TEXT_MODE ? -1 : channel->offset;
caml_enter_blocking_section_no_pending();
end = lseek(fd, 0, SEEK_END);
if (end == -1 || lseek(fd, offset, SEEK_SET) != offset) {
caml_leave_blocking_section();
caml_sys_error(NO_ARG);
if (here == -1) {
here = lseek(fd, 0, SEEK_CUR);
if (here == -1) goto error;
}
end = lseek(fd, 0, SEEK_END);
if (end == -1) goto error;
if (lseek(fd, here, SEEK_SET) != here) goto error;
caml_leave_blocking_section();
return end;
error:
caml_leave_blocking_section();
caml_sys_error(NO_ARG);
}
CAMLexport int caml_channel_binary_mode(struct channel *channel)
{
#if defined(_WIN32) || defined(__CYGWIN__)
int oldmode = setmode(channel->fd, O_BINARY);
if (oldmode == O_TEXT) setmode(channel->fd, O_TEXT);
return oldmode == O_BINARY;
#else
return 1;
#endif
return channel->flags & CHANNEL_TEXT_MODE ? 0 : 1;
}
/* Output */
@ -349,8 +358,9 @@ CAMLexport intnat caml_really_getblock(struct channel *chan, char *p, intnat n)
CAMLexport void caml_seek_in(struct channel *channel, file_offset dest)
{
if (dest >= channel->offset - (channel->max - channel->buff) &&
dest <= channel->offset) {
if (dest >= channel->offset - (channel->max - channel->buff)
&& dest <= channel->offset
&& (channel->flags & CHANNEL_TEXT_MODE) == 0) {
channel->curr = channel->max - (channel->offset - dest);
} else {
caml_enter_blocking_section_no_pending();
@ -592,13 +602,14 @@ static file_offset ml_channel_size(value vchannel)
Lock(channel);
size = caml_channel_size(Channel(vchannel));
Unlock(channel);
if (size > Max_long) { errno = EOVERFLOW; caml_sys_error(NO_ARG); }
CAMLreturnT(file_offset, size);
}
CAMLprim value caml_ml_channel_size(value vchannel)
{
return Val_long(ml_channel_size(vchannel));
file_offset size = ml_channel_size(vchannel);
if (size > Max_long) { errno = EOVERFLOW; caml_sys_error(NO_ARG); }
return Val_long(size);
}
CAMLprim value caml_ml_channel_size_64(value vchannel)
@ -621,6 +632,10 @@ CAMLprim value caml_ml_set_binary_mode(value vchannel, value mode)
#endif
if (setmode(channel->fd, Bool_val(mode) ? O_BINARY : O_TEXT) == -1)
caml_sys_error(NO_ARG);
if (Bool_val(mode))
channel->flags &= ~CHANNEL_TEXT_MODE;
else
channel->flags |= CHANNEL_TEXT_MODE;
#endif
return Val_unit;
}

View File

@ -998,7 +998,13 @@ val seek_out : out_channel -> int -> unit
val pos_out : out_channel -> int
(** Return the current writing position for the given channel. Does
not work on channels opened with the [Open_append] flag (returns
unspecified results). *)
unspecified results).
For files opened in text mode under Windows, the returned position
is approximate (owing to end-of-line conversion); in particular,
saving the current position with [pos_out], then going back to
this position using [seek_out] will not work. For this
programming idiom to work reliably and portably, the file must be
opened in binary mode. *)
val out_channel_length : out_channel -> int
(** Return the size (number of characters) of the regular file
@ -1113,7 +1119,13 @@ val seek_in : in_channel -> int -> unit
files of other kinds, the behavior is unspecified. *)
val pos_in : in_channel -> int
(** Return the current reading position for the given channel. *)
(** Return the current reading position for the given channel. For
files opened in text mode under Windows, the returned position is
approximate (owing to end-of-line conversion); in particular,
saving the current position with [pos_in], then going back to this
position using [seek_in] will not work. For this programming
idiom to work reliably and portably, the file must be opened in
binary mode. *)
val in_channel_length : in_channel -> int
(** Return the size (number of characters) of the regular file

View File

@ -0,0 +1,20 @@
(* TEST *)
let len = 15000
let rounds = 10
let () =
let oc = open_out "data.txt" in
for i = 1 to rounds do
Printf.fprintf oc "%s\n%!" (String.make len 'x');
done;
close_out oc;
let ic = open_in "data.txt" in
let l1 = in_channel_length ic in
for i = 1 to rounds do
let s = input_line ic in
assert (String.length s = len);
let l = in_channel_length ic in
assert (l = l1)
done;
close_in ic

View File

@ -0,0 +1,19 @@
(* TEST *)
let () =
let oc = open_out_bin "data.txt" in
output_string oc "0\r\n1\r\n";
close_out oc;
(* Open in text mode to trigger EOL conversion under Windows *)
let ic = open_in "data.txt" in
ignore (input_line ic);
seek_in ic 3;
(* Normally we should be looking at "1\r\n", which will be read as
"1" under Windows because of EOL conversion and "1\r" otherwise.
What goes wrong with the old implementation of seek_in is that
we have "0\n\1\n" in the channel buffer and have read "0\n" already,
so we think we are at position 2, and the seek to position 3
just advances by one in the buffer, pointing to "\n" instead of "1\n". *)
let l = input_line ic in
close_in ic;
assert (l = "1" || l = "1\r")