diff --git a/otherlibs/bigarray/.depend b/otherlibs/bigarray/.depend index 5cf42696f..7c2398871 100644 --- a/otherlibs/bigarray/.depend +++ b/otherlibs/bigarray/.depend @@ -6,5 +6,9 @@ bigarray_stubs.o: bigarray_stubs.c ../../byterun/alloc.h \ ../../byterun/fix_code.h ../../byterun/memory.h ../../byterun/gc.h \ ../../byterun/major_gc.h ../../byterun/freelist.h \ ../../byterun/minor_gc.h +mmap_unix.o: mmap_unix.c bigarray.h ../../byterun/mlvalues.h \ + ../../byterun/config.h ../../byterun/../config/m.h \ + ../../byterun/../config/s.h ../../byterun/misc.h \ + ../../byterun/custom.h ../../byterun/fail.h ../../byterun/sys.h bigarray.cmo: bigarray.cmi bigarray.cmx: bigarray.cmi diff --git a/otherlibs/bigarray/Makefile b/otherlibs/bigarray/Makefile index 93a14dd55..064ff3903 100644 --- a/otherlibs/bigarray/Makefile +++ b/otherlibs/bigarray/Makefile @@ -19,7 +19,7 @@ CFLAGS=-I../../byterun -g -O $(BYTECCCOMPOPTS) CAMLC=../../boot/ocamlrun ../../ocamlc -I ../../stdlib -I ../unix CAMLOPT=../../boot/ocamlrun ../../ocamlopt -I ../../stdlib -I ../unix -C_OBJS=bigarray_stubs.o +C_OBJS=bigarray_stubs.o mmap_unix.o CAML_OBJS=bigarray.cmo @@ -33,10 +33,10 @@ libbigarray.a: $(C_OBJS) $(RANLIB) libbigarray.a bigarray.cma: $(CAML_OBJS) - $(CAMLC) -a -o bigarray.cma $(CAML_OBJS) + $(CAMLC) -a -linkall -o bigarray.cma $(CAML_OBJS) bigarray.cmxa: $(CAML_OBJS:.cmo=.cmx) - $(CAMLOPT) -a -o bigarray.cmxa $(CAML_OBJS:.cmo=.cmx) + $(CAMLOPT) -a -linkall -o bigarray.cmxa $(CAML_OBJS:.cmo=.cmx) partialclean: rm -f *.cm* diff --git a/otherlibs/bigarray/bigarray.h b/otherlibs/bigarray/bigarray.h index e9265f82d..1d1cc8630 100644 --- a/otherlibs/bigarray/bigarray.h +++ b/otherlibs/bigarray/bigarray.h @@ -43,12 +43,14 @@ enum caml_bigarray_layout { enum caml_bigarray_managed { BIGARRAY_EXTERNAL = 0, /* Data is not allocated by Caml */ BIGARRAY_MANAGED = 0x200, /* Data is allocated by Caml */ - BIGARRAY_MANAGED_MASK = 0x200 /* Mask for "managed" bit in flags field */ + BIGARRAY_MAPPED_FILE = 0x400, /* Data is a memory mapped file */ + BIGARRAY_MANAGED_MASK = 0x600 /* Mask for "managed" bits in flags field */ }; struct caml_bigarray_proxy { long refcount; /* Reference count */ void * data; /* Pointer to base of actual data */ + unsigned long size; /* Size of data in bytes (if mapped file) */ }; struct caml_bigarray { @@ -67,5 +69,4 @@ extern value alloc_bigarray(int flags, int num_dims, void * data, long * dim); extern value alloc_bigarray_dims(int flags, int num_dims, void * data, ... /*dimensions, with type long */); - #endif diff --git a/otherlibs/bigarray/bigarray.ml b/otherlibs/bigarray/bigarray.ml index e38ed006d..5b97769e9 100644 --- a/otherlibs/bigarray/bigarray.ml +++ b/otherlibs/bigarray/bigarray.ml @@ -14,6 +14,10 @@ (* Module [Bigarray]: large, multi-dimensional, numerical arrays *) +external init : unit -> unit = "bigarray_init" + +let _ = init() + type ('a, 'b) kind = int type int8_signed_elt @@ -77,6 +81,9 @@ module Genarray = struct external blit: ('a, 'b, 'c) t -> ('a, 'b, 'c) t -> unit = "bigarray_blit" external fill: ('a, 'b, 'c) t -> 'a -> unit = "bigarray_fill" + external map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout -> + shared:bool -> dims:int array -> ('a, 'b, 'c) t + = "bigarray_map_file" end module Array1 = struct @@ -94,6 +101,8 @@ module Array1 = struct let ofs = if (Obj.magic layout : 'a layout) = c_layout then 0 else 1 in for i = 0 to Array.length data - 1 do set ba (i + ofs) data.(i) done; ba + let map_file fd kind layout shared dim = + Genarray.map_file fd kind layout shared [|dim|] end module Array2 = struct @@ -124,6 +133,8 @@ module Array2 = struct done done; ba + let map_file fd kind layout shared dim1 dim2 = + Genarray.map_file fd kind layout shared [|dim1;dim2|] end module Array3 = struct @@ -163,6 +174,8 @@ module Array3 = struct done done; ba + let map_file fd kind layout shared dim1 dim2 dim3 = + Genarray.map_file fd kind layout shared [|dim1;dim2;dim3|] end external genarray_of_array1: ('a, 'b, 'c) Array1.t -> ('a, 'b, 'c) Genarray.t = "%identity" diff --git a/otherlibs/bigarray/bigarray.mli b/otherlibs/bigarray/bigarray.mli index ab54d1e44..9b522a25d 100644 --- a/otherlibs/bigarray/bigarray.mli +++ b/otherlibs/bigarray/bigarray.mli @@ -310,6 +310,40 @@ module Genarray: sig the big array [a]. Setting only some elements of [a] to [v] can be achieved by applying [Genarray.fill] to a sub-array or a slice of [a]. *) + external map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout -> + shared:bool -> dims:int array -> ('a, 'b, 'c) t = "bigarray_map_file" + (* Memory mapping of a file as a big array. + [Genarray.map_file fd kind layout shared dims] + returns a big array of kind [kind], layout [layout], + and dimensions as specified in [dims]. The data contained in + this big array are the contents of the file referred to by + the file descriptor [fd] (as opened previously with + [Unix.openfile], for example). If [shared] is [true], + all modifications performed on the array are reflected in + the file. This requires that [fd] be opened with write permissions. + If [shared] is [false], modifications performed on the array + are done in memory only, using copy-on-write of the modified + pages; the underlying file is not affected. + + [Genarray.map_file] is much more efficient than reading + the whole file in a big array, modifying that big array, + and writing it afterwards. + + To adjust automatically the dimensions of the big array to + the actual size of the file, the major dimension (that is, + the first dimension for an array with C layout, and the last + dimension for an array with Fortran layout) can be given as + [-1]. [Genarray.map_file] then determines the major dimension + from the size of the file. The file must contain an integral + number of sub-arrays as determined by the non-major dimensions, + otherwise [Failure] is raised. + + If all dimensions of the big array are given, the file size is + matched against the size of the big array. If the file is larger + than the big array, only the initial portion of the file is + mapped to the big array. If the file is smaller than the big + array, the file is automatically grown to the size of the big array. + This requires write permissions on [fd]. *) end (*** One-dimensional arrays *) @@ -359,6 +393,10 @@ module Array1: sig val of_array: ('a, 'b) kind -> 'c layout -> 'a array -> ('a, 'b, 'c) t (* Build a one-dimensional big array initialized from the given array. *) + val map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout -> + shared:bool -> dim:int -> ('a, 'b, 'c) t + (* Memory mapping of a file as a one-dimensional big array. + See [Genarray.map_file] for more details. *) end (*** Two-dimensional arrays *) @@ -432,6 +470,10 @@ module Array2: sig val of_array: ('a, 'b) kind -> 'c layout -> 'a array array -> ('a, 'b, 'c) t (* Build a two-dimensional big array initialized from the given array of arrays. *) + val map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout -> + shared:bool -> dim1:int -> dim2:int -> ('a, 'b, 'c) t + (* Memory mapping of a file as a two-dimensional big array. + See [Genarray.map_file] for more details. *) end (*** Three-dimensional arrays *) @@ -528,6 +570,10 @@ module Array3: sig ('a, 'b) kind -> 'c layout -> 'a array array array -> ('a, 'b, 'c) t (* Build a three-dimensional big array initialized from the given array of arrays of arrays. *) + val map_file: Unix.file_descr -> ('a, 'b) kind -> 'c layout -> + shared:bool -> dim1:int -> dim2:int -> dim3:int -> ('a, 'b, 'c) t + (* Memory mapping of a file as a three-dimensional big array. + See [Genarray.map_file] for more details. *) end (*** Coercions between generic big arrays and fixed-dimension big arrays *) diff --git a/otherlibs/bigarray/bigarray_stubs.c b/otherlibs/bigarray/bigarray_stubs.c index 62f8b4e39..fb57c2f50 100644 --- a/otherlibs/bigarray/bigarray_stubs.c +++ b/otherlibs/bigarray/bigarray_stubs.c @@ -23,11 +23,14 @@ #include "memory.h" #include "mlvalues.h" +extern void bigarray_unmap_file(void * addr, unsigned long len); + /* from mmap_stubs.c */ + /* Compute the number of elements of a big array */ -static long bigarray_num_elts(struct caml_bigarray * b) +static unsigned long bigarray_num_elts(struct caml_bigarray * b) { - long num_elts; + unsigned long num_elts; int i; num_elts = 1; for (i = 0; i < b->num_dims; i++) num_elts = num_elts * b->dim[i]; @@ -36,7 +39,7 @@ static long bigarray_num_elts(struct caml_bigarray * b) /* Size in bytes of a bigarray element, indexed by bigarray kind */ -static int bigarray_element_size[] = +int bigarray_element_size[] = { 4 /*FLOAT32*/, 8 /*FLOAT64*/, 1 /*SINT8*/, 1 /*UINT8*/, 2 /*SINT16*/, 2 /*UINT16*/, @@ -44,6 +47,14 @@ static int bigarray_element_size[] = sizeof(value) /*CAML_INT*/, sizeof(value) /*NATIVE_INT*/ }; +/* Compute the number of bytes for the elements of a big array */ + +unsigned long bigarray_byte_size(struct caml_bigarray * b) +{ + return bigarray_num_elts(b) + * bigarray_element_size[b->flags & BIGARRAY_KIND_MASK]; +} + /* Allocation of a big array */ static struct custom_operations bigarray_ops; @@ -366,14 +377,30 @@ value bigarray_dim(value vb, value vn) static void bigarray_finalize(value v) { struct caml_bigarray * b = Bigarray_val(v); - if ((b->flags & BIGARRAY_MANAGED_MASK) == BIGARRAY_EXTERNAL) return; - if (b->proxy == NULL) { - free(b->data); - } else { - if (-- b->proxy->refcount == 0) { - stat_free(b->proxy->data); - stat_free(b->proxy); + + switch (b->flags & BIGARRAY_MANAGED_MASK) { + case BIGARRAY_EXTERNAL: + break; + case BIGARRAY_MANAGED: + if (b->proxy == NULL) { + free(b->data); + } else { + if (-- b->proxy->refcount == 0) { + stat_free(b->proxy->data); + stat_free(b->proxy); + } } + break; + case BIGARRAY_MAPPED_FILE: + if (b->proxy == NULL) { + bigarray_unmap_file(b->data, bigarray_byte_size(b)); + } else { + if (-- b->proxy->refcount == 0) { + bigarray_unmap_file(b->proxy->data, b->proxy->size); + stat_free(b->proxy); + } + } + break; } } @@ -383,7 +410,7 @@ static int bigarray_compare(value v1, value v2) { struct caml_bigarray * b1 = Bigarray_val(v1); struct caml_bigarray * b2 = Bigarray_val(v2); - long n, num_elts; + unsigned long n, num_elts; int i; /* Compare number of dimensions */ @@ -594,7 +621,7 @@ unsigned long bigarray_deserialize(void * dst) { struct caml_bigarray * b = dst; int i, elt_size; - long num_elts; + unsigned long num_elts; /* Read back header information */ b->num_dims = deserialize_uint_4(); @@ -660,6 +687,8 @@ static void bigarray_update_proxy(struct caml_bigarray * b1, proxy = stat_alloc(sizeof(struct caml_bigarray_proxy)); proxy->refcount = 2; /* original array + sub array */ proxy->data = b1->data; + proxy->size = + b1->flags & BIGARRAY_MAPPED_FILE ? bigarray_byte_size(b1) : 0; b1->proxy = proxy; b2->proxy = proxy; } @@ -835,3 +864,11 @@ value bigarray_fill(value vb, value vinit) } return Val_unit; } + +/* Initialization */ + +value bigarray_init(value unit) +{ + register_custom_operations(&bigarray_ops); + return Val_unit; +} diff --git a/otherlibs/bigarray/mmap_unix.c b/otherlibs/bigarray/mmap_unix.c new file mode 100644 index 000000000..a78b06d3e --- /dev/null +++ b/otherlibs/bigarray/mmap_unix.c @@ -0,0 +1,115 @@ +/***********************************************************************/ +/* */ +/* Objective Caml */ +/* */ +/* Manuel Serrano and Xavier Leroy, INRIA Rocquencourt */ +/* */ +/* Copyright 2000 Institut National de Recherche en Informatique et */ +/* en Automatique. All rights reserved. This file is distributed */ +/* under the terms of the GNU Library General Public License. */ +/* */ +/***********************************************************************/ + +/* $Id$ */ + +#include +#include +#include "bigarray.h" +#include "custom.h" +#include "fail.h" +#include "mlvalues.h" +#include "sys.h" + +extern int bigarray_element_size[]; /* from bigarray_stubs.c */ + +#ifdef HAS_UNISTD +#include +#endif +#ifdef HAS_MMAP +#include +#endif + +#if defined(HAS_MMAP) + +value bigarray_map_file(value vfd, value vkind, value vlayout, + value vshared, value vdim) +{ + int fd, flags, major_dim, shared; + long num_dims, i; + long dim[MAX_NUM_DIMS]; + long currpos, file_size; + unsigned long array_size; + char c; + void * addr; + + fd = Int_val(vfd); + flags = Int_val(vkind) | Int_val(vlayout); + num_dims = Wosize_val(vdim); + major_dim = flags & BIGARRAY_FORTRAN_LAYOUT ? num_dims - 1 : 0; + /* Extract dimensions from Caml array */ + num_dims = Wosize_val(vdim); + if (num_dims < 1 || num_dims > MAX_NUM_DIMS) + invalid_argument("Bigarray.mmap: bad number of dimensions"); + for (i = 0; i < num_dims; i++) { + dim[i] = Long_val(Field(vdim, i)); + if (dim[i] == -1 && i == major_dim) continue; + if (dim[i] < 0 || dim[i] > 0x7FFFFFFFL) + invalid_argument("Bigarray.create: negative dimension"); + } + /* Determine file size */ + currpos = lseek(fd, 0, SEEK_CUR); + if (currpos == -1) sys_error(NO_ARG); + file_size = lseek(fd, 0, SEEK_END); + if (file_size == -1) sys_error(NO_ARG); + /* Determine array size in bytes (or size of array without the major + dimension if that dimension wasn't specified) */ + array_size = bigarray_element_size[flags & BIGARRAY_KIND_MASK]; + for (i = 0; i < num_dims; i++) + if (dim[i] != -1) array_size *= dim[i]; + /* Check if the first/last dimension is unknown */ + if (dim[major_dim] == -1) { + /* Determine first/last dimension from file size */ + if ((unsigned long) file_size % array_size != 0) + failwith("Bigarray.mmap: file size doesn't match array dimensions"); + dim[major_dim] = (unsigned long) file_size / array_size; + array_size = file_size; + } else { + /* Check that file is large enough, and grow it otherwise */ + if (file_size < array_size) { + if (lseek(fd, array_size - 1, SEEK_SET) == -1) sys_error(NO_ARG); + c = 0; + if (write(fd, &c, 1) != 1) sys_error(NO_ARG); + } + } + /* Restore original file position */ + lseek(fd, currpos, SEEK_SET); + /* Do the mmap */ + shared = Bool_val(vshared) ? MAP_SHARED : MAP_PRIVATE; + addr = mmap(NULL, array_size, PROT_READ | PROT_WRITE, shared, fd, 0); + if (addr == MAP_FAILED) sys_error(NO_ARG); + /* Build and return the Caml bigarray */ + return alloc_bigarray(flags | BIGARRAY_MAPPED_FILE, num_dims, addr, dim); +} + +#else + +value bigarray_map_file(value vfd, value vkind, value vlayout, + value vshared, value vdim) +{ + invalid_argument("Bigarray.map_file: not supported"); + return Val_unit; +} + +#endif + + +void bigarray_unmap_file(void * addr, unsigned long len) +{ +#if defined(HAS_MMAP) + munmap(addr, len); +#endif +} + + + +