Ajout memory mapping. Diverses corrections

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@2865 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
master
Xavier Leroy 2000-02-25 13:45:54 +00:00
parent e448d52aa7
commit 36e77a4f4f
7 changed files with 233 additions and 17 deletions

View File

@ -6,5 +6,9 @@ bigarray_stubs.o: bigarray_stubs.c ../../byterun/alloc.h \
../../byterun/fix_code.h ../../byterun/memory.h ../../byterun/gc.h \
../../byterun/major_gc.h ../../byterun/freelist.h \
../../byterun/minor_gc.h
mmap_unix.o: mmap_unix.c bigarray.h ../../byterun/mlvalues.h \
../../byterun/config.h ../../byterun/../config/m.h \
../../byterun/../config/s.h ../../byterun/misc.h \
../../byterun/custom.h ../../byterun/fail.h ../../byterun/sys.h
bigarray.cmo: bigarray.cmi
bigarray.cmx: bigarray.cmi

View File

@ -19,7 +19,7 @@ CFLAGS=-I../../byterun -g -O $(BYTECCCOMPOPTS)
CAMLC=../../boot/ocamlrun ../../ocamlc -I ../../stdlib -I ../unix
CAMLOPT=../../boot/ocamlrun ../../ocamlopt -I ../../stdlib -I ../unix
C_OBJS=bigarray_stubs.o
C_OBJS=bigarray_stubs.o mmap_unix.o
CAML_OBJS=bigarray.cmo
@ -33,10 +33,10 @@ libbigarray.a: $(C_OBJS)
$(RANLIB) libbigarray.a
bigarray.cma: $(CAML_OBJS)
$(CAMLC) -a -o bigarray.cma $(CAML_OBJS)
$(CAMLC) -a -linkall -o bigarray.cma $(CAML_OBJS)
bigarray.cmxa: $(CAML_OBJS:.cmo=.cmx)
$(CAMLOPT) -a -o bigarray.cmxa $(CAML_OBJS:.cmo=.cmx)
$(CAMLOPT) -a -linkall -o bigarray.cmxa $(CAML_OBJS:.cmo=.cmx)
partialclean:
rm -f *.cm*

View File

@ -43,12 +43,14 @@ enum caml_bigarray_layout {
enum caml_bigarray_managed {
BIGARRAY_EXTERNAL = 0, /* Data is not allocated by Caml */
BIGARRAY_MANAGED = 0x200, /* Data is allocated by Caml */
BIGARRAY_MANAGED_MASK = 0x200 /* Mask for "managed" bit in flags field */
BIGARRAY_MAPPED_FILE = 0x400, /* Data is a memory mapped file */
BIGARRAY_MANAGED_MASK = 0x600 /* Mask for "managed" bits in flags field */
};
struct caml_bigarray_proxy {
long refcount; /* Reference count */
void * data; /* Pointer to base of actual data */
unsigned long size; /* Size of data in bytes (if mapped file) */
};
struct caml_bigarray {
@ -67,5 +69,4 @@ extern value alloc_bigarray(int flags, int num_dims, void * data, long * dim);
extern value alloc_bigarray_dims(int flags, int num_dims, void * data,
... /*dimensions, with type long */);
#endif

View File

@ -14,6 +14,10 @@
(* Module [Bigarray]: large, multi-dimensional, numerical arrays *)
external init : unit -> unit = "bigarray_init"
let _ = init()
type ('a, 'b) kind = int
type int8_signed_elt
@ -77,6 +81,9 @@ module Genarray = struct
external blit: ('a, 'b, 'c) t -> ('a, 'b, 'c) t -> unit
= "bigarray_blit"
external fill: ('a, 'b, 'c) t -> 'a -> unit = "bigarray_fill"
external map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout ->
shared:bool -> dims:int array -> ('a, 'b, 'c) t
= "bigarray_map_file"
end
module Array1 = struct
@ -94,6 +101,8 @@ module Array1 = struct
let ofs = if (Obj.magic layout : 'a layout) = c_layout then 0 else 1 in
for i = 0 to Array.length data - 1 do set ba (i + ofs) data.(i) done;
ba
let map_file fd kind layout shared dim =
Genarray.map_file fd kind layout shared [|dim|]
end
module Array2 = struct
@ -124,6 +133,8 @@ module Array2 = struct
done
done;
ba
let map_file fd kind layout shared dim1 dim2 =
Genarray.map_file fd kind layout shared [|dim1;dim2|]
end
module Array3 = struct
@ -163,6 +174,8 @@ module Array3 = struct
done
done;
ba
let map_file fd kind layout shared dim1 dim2 dim3 =
Genarray.map_file fd kind layout shared [|dim1;dim2;dim3|]
end
external genarray_of_array1: ('a, 'b, 'c) Array1.t -> ('a, 'b, 'c) Genarray.t = "%identity"

View File

@ -310,6 +310,40 @@ module Genarray: sig
the big array [a]. Setting only some elements of [a] to [v]
can be achieved by applying [Genarray.fill] to a sub-array
or a slice of [a]. *)
external map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout ->
shared:bool -> dims:int array -> ('a, 'b, 'c) t = "bigarray_map_file"
(* Memory mapping of a file as a big array.
[Genarray.map_file fd kind layout shared dims]
returns a big array of kind [kind], layout [layout],
and dimensions as specified in [dims]. The data contained in
this big array are the contents of the file referred to by
the file descriptor [fd] (as opened previously with
[Unix.openfile], for example). If [shared] is [true],
all modifications performed on the array are reflected in
the file. This requires that [fd] be opened with write permissions.
If [shared] is [false], modifications performed on the array
are done in memory only, using copy-on-write of the modified
pages; the underlying file is not affected.
[Genarray.map_file] is much more efficient than reading
the whole file in a big array, modifying that big array,
and writing it afterwards.
To adjust automatically the dimensions of the big array to
the actual size of the file, the major dimension (that is,
the first dimension for an array with C layout, and the last
dimension for an array with Fortran layout) can be given as
[-1]. [Genarray.map_file] then determines the major dimension
from the size of the file. The file must contain an integral
number of sub-arrays as determined by the non-major dimensions,
otherwise [Failure] is raised.
If all dimensions of the big array are given, the file size is
matched against the size of the big array. If the file is larger
than the big array, only the initial portion of the file is
mapped to the big array. If the file is smaller than the big
array, the file is automatically grown to the size of the big array.
This requires write permissions on [fd]. *)
end
(*** One-dimensional arrays *)
@ -359,6 +393,10 @@ module Array1: sig
val of_array: ('a, 'b) kind -> 'c layout -> 'a array -> ('a, 'b, 'c) t
(* Build a one-dimensional big array initialized from the
given array. *)
val map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout ->
shared:bool -> dim:int -> ('a, 'b, 'c) t
(* Memory mapping of a file as a one-dimensional big array.
See [Genarray.map_file] for more details. *)
end
(*** Two-dimensional arrays *)
@ -432,6 +470,10 @@ module Array2: sig
val of_array: ('a, 'b) kind -> 'c layout -> 'a array array -> ('a, 'b, 'c) t
(* Build a two-dimensional big array initialized from the
given array of arrays. *)
val map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout ->
shared:bool -> dim1:int -> dim2:int -> ('a, 'b, 'c) t
(* Memory mapping of a file as a two-dimensional big array.
See [Genarray.map_file] for more details. *)
end
(*** Three-dimensional arrays *)
@ -528,6 +570,10 @@ module Array3: sig
('a, 'b) kind -> 'c layout -> 'a array array array -> ('a, 'b, 'c) t
(* Build a three-dimensional big array initialized from the
given array of arrays of arrays. *)
val map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout ->
shared:bool -> dim1:int -> dim2:int -> dim3:int -> ('a, 'b, 'c) t
(* Memory mapping of a file as a three-dimensional big array.
See [Genarray.map_file] for more details. *)
end
(*** Coercions between generic big arrays and fixed-dimension big arrays *)

View File

@ -23,11 +23,14 @@
#include "memory.h"
#include "mlvalues.h"
extern void bigarray_unmap_file(void * addr, unsigned long len);
/* from mmap_stubs.c */
/* Compute the number of elements of a big array */
static long bigarray_num_elts(struct caml_bigarray * b)
static unsigned long bigarray_num_elts(struct caml_bigarray * b)
{
long num_elts;
unsigned long num_elts;
int i;
num_elts = 1;
for (i = 0; i < b->num_dims; i++) num_elts = num_elts * b->dim[i];
@ -36,7 +39,7 @@ static long bigarray_num_elts(struct caml_bigarray * b)
/* Size in bytes of a bigarray element, indexed by bigarray kind */
static int bigarray_element_size[] =
int bigarray_element_size[] =
{ 4 /*FLOAT32*/, 8 /*FLOAT64*/,
1 /*SINT8*/, 1 /*UINT8*/,
2 /*SINT16*/, 2 /*UINT16*/,
@ -44,6 +47,14 @@ static int bigarray_element_size[] =
sizeof(value) /*CAML_INT*/, sizeof(value) /*NATIVE_INT*/
};
/* Compute the number of bytes for the elements of a big array */
unsigned long bigarray_byte_size(struct caml_bigarray * b)
{
return bigarray_num_elts(b)
* bigarray_element_size[b->flags & BIGARRAY_KIND_MASK];
}
/* Allocation of a big array */
static struct custom_operations bigarray_ops;
@ -366,14 +377,30 @@ value bigarray_dim(value vb, value vn)
static void bigarray_finalize(value v)
{
struct caml_bigarray * b = Bigarray_val(v);
if ((b->flags & BIGARRAY_MANAGED_MASK) == BIGARRAY_EXTERNAL) return;
if (b->proxy == NULL) {
free(b->data);
} else {
if (-- b->proxy->refcount == 0) {
stat_free(b->proxy->data);
stat_free(b->proxy);
switch (b->flags & BIGARRAY_MANAGED_MASK) {
case BIGARRAY_EXTERNAL:
break;
case BIGARRAY_MANAGED:
if (b->proxy == NULL) {
free(b->data);
} else {
if (-- b->proxy->refcount == 0) {
stat_free(b->proxy->data);
stat_free(b->proxy);
}
}
break;
case BIGARRAY_MAPPED_FILE:
if (b->proxy == NULL) {
bigarray_unmap_file(b->data, bigarray_byte_size(b));
} else {
if (-- b->proxy->refcount == 0) {
bigarray_unmap_file(b->proxy->data, b->proxy->size);
stat_free(b->proxy);
}
}
break;
}
}
@ -383,7 +410,7 @@ static int bigarray_compare(value v1, value v2)
{
struct caml_bigarray * b1 = Bigarray_val(v1);
struct caml_bigarray * b2 = Bigarray_val(v2);
long n, num_elts;
unsigned long n, num_elts;
int i;
/* Compare number of dimensions */
@ -594,7 +621,7 @@ unsigned long bigarray_deserialize(void * dst)
{
struct caml_bigarray * b = dst;
int i, elt_size;
long num_elts;
unsigned long num_elts;
/* Read back header information */
b->num_dims = deserialize_uint_4();
@ -660,6 +687,8 @@ static void bigarray_update_proxy(struct caml_bigarray * b1,
proxy = stat_alloc(sizeof(struct caml_bigarray_proxy));
proxy->refcount = 2; /* original array + sub array */
proxy->data = b1->data;
proxy->size =
b1->flags & BIGARRAY_MAPPED_FILE ? bigarray_byte_size(b1) : 0;
b1->proxy = proxy;
b2->proxy = proxy;
}
@ -835,3 +864,11 @@ value bigarray_fill(value vb, value vinit)
}
return Val_unit;
}
/* Initialization */
value bigarray_init(value unit)
{
register_custom_operations(&bigarray_ops);
return Val_unit;
}

View File

@ -0,0 +1,115 @@
/***********************************************************************/
/* */
/* Objective Caml */
/* */
/* Manuel Serrano and Xavier Leroy, INRIA Rocquencourt */
/* */
/* Copyright 2000 Institut National de Recherche en Informatique et */
/* en Automatique. All rights reserved. This file is distributed */
/* under the terms of the GNU Library General Public License. */
/* */
/***********************************************************************/
/* $Id$ */
#include <stddef.h>
#include <string.h>
#include "bigarray.h"
#include "custom.h"
#include "fail.h"
#include "mlvalues.h"
#include "sys.h"
extern int bigarray_element_size[]; /* from bigarray_stubs.c */
#ifdef HAS_UNISTD
#include <unistd.h>
#endif
#ifdef HAS_MMAP
#include <sys/mman.h>
#endif
#if defined(HAS_MMAP)
value bigarray_map_file(value vfd, value vkind, value vlayout,
value vshared, value vdim)
{
int fd, flags, major_dim, shared;
long num_dims, i;
long dim[MAX_NUM_DIMS];
long currpos, file_size;
unsigned long array_size;
char c;
void * addr;
fd = Int_val(vfd);
flags = Int_val(vkind) | Int_val(vlayout);
num_dims = Wosize_val(vdim);
major_dim = flags & BIGARRAY_FORTRAN_LAYOUT ? num_dims - 1 : 0;
/* Extract dimensions from Caml array */
num_dims = Wosize_val(vdim);
if (num_dims < 1 || num_dims > MAX_NUM_DIMS)
invalid_argument("Bigarray.mmap: bad number of dimensions");
for (i = 0; i < num_dims; i++) {
dim[i] = Long_val(Field(vdim, i));
if (dim[i] == -1 && i == major_dim) continue;
if (dim[i] < 0 || dim[i] > 0x7FFFFFFFL)
invalid_argument("Bigarray.create: negative dimension");
}
/* Determine file size */
currpos = lseek(fd, 0, SEEK_CUR);
if (currpos == -1) sys_error(NO_ARG);
file_size = lseek(fd, 0, SEEK_END);
if (file_size == -1) sys_error(NO_ARG);
/* Determine array size in bytes (or size of array without the major
dimension if that dimension wasn't specified) */
array_size = bigarray_element_size[flags & BIGARRAY_KIND_MASK];
for (i = 0; i < num_dims; i++)
if (dim[i] != -1) array_size *= dim[i];
/* Check if the first/last dimension is unknown */
if (dim[major_dim] == -1) {
/* Determine first/last dimension from file size */
if ((unsigned long) file_size % array_size != 0)
failwith("Bigarray.mmap: file size doesn't match array dimensions");
dim[major_dim] = (unsigned long) file_size / array_size;
array_size = file_size;
} else {
/* Check that file is large enough, and grow it otherwise */
if (file_size < array_size) {
if (lseek(fd, array_size - 1, SEEK_SET) == -1) sys_error(NO_ARG);
c = 0;
if (write(fd, &c, 1) != 1) sys_error(NO_ARG);
}
}
/* Restore original file position */
lseek(fd, currpos, SEEK_SET);
/* Do the mmap */
shared = Bool_val(vshared) ? MAP_SHARED : MAP_PRIVATE;
addr = mmap(NULL, array_size, PROT_READ | PROT_WRITE, shared, fd, 0);
if (addr == MAP_FAILED) sys_error(NO_ARG);
/* Build and return the Caml bigarray */
return alloc_bigarray(flags | BIGARRAY_MAPPED_FILE, num_dims, addr, dim);
}
#else
value bigarray_map_file(value vfd, value vkind, value vlayout,
value vshared, value vdim)
{
invalid_argument("Bigarray.map_file: not supported");
return Val_unit;
}
#endif
void bigarray_unmap_file(void * addr, unsigned long len)
{
#if defined(HAS_MMAP)
munmap(addr, len);
#endif
}