Windows Unicode handling for ocamlyacc

This deals with the command line processing only (i.e. filenames) -
ocamlyacc continues to process .mly files as before.
master
David Allsopp 2019-04-16 17:37:17 +01:00
parent fd9e7b2ee8
commit 7f96c82fda
9 changed files with 197 additions and 97 deletions

View File

@ -140,6 +140,9 @@ Working version
(Whitequark and Jacques-Henri Jourdan, review by Gabriel Scherer
and Xavier Clerc)
- #8621: Make ocamlyacc a Windows Unicode application
(David Allsopp, review by Nicolás Ojeda Bär)
* #8834, `ocaml`: adhere to the XDG base directory specification to
locate an `.ocamlinit` file. Reads an `$XDG_CONFIG_HOME/ocaml/init.ml`
file before trying to lookup `~/.ocamlinit`. On Windows the behaviour

View File

@ -257,6 +257,9 @@ extern double caml_log1p(double);
#define strcmp_os wcscmp
#define strlen_os wcslen
#define sscanf_os swscanf
#define strcpy_os wcscpy
#define mktemp_os _wmktemp
#define fopen_os _wfopen
#define caml_stat_strdup_os caml_stat_wcsdup
#define caml_stat_strconcat_os caml_stat_wcsconcat
@ -289,6 +292,9 @@ extern double caml_log1p(double);
#define strcmp_os strcmp
#define strlen_os strlen
#define sscanf_os sscanf
#define strcpy_os strcpy
#define mktemp_os mktemp
#define fopen_os fopen
#define caml_stat_strdup_os caml_stat_strdup
#define caml_stat_strconcat_os caml_stat_strconcat

View File

@ -22,9 +22,15 @@ include $(ROOTDIR)/Makefile.common
OC_CPPFLAGS += -I$(ROOTDIR)/runtime
ifeq "$(UNIX_OR_WIN32)" "win32"
WSTR_OBJ = wstr
else
WSTR_OBJ =
endif
ocamlyacc_SOURCES := $(addsuffix .c,\
closure error lalr lr0 main mkpar output reader skeleton symtab verbose \
warshall)
$(WSTR_OBJ) closure error lalr lr0 main mkpar output reader skeleton \
symtab verbose warshall)
ocamlyacc_OBJECTS := $(ocamlyacc_SOURCES:.c=.$(O))
@ -32,12 +38,8 @@ generated_files := ocamlyacc$(EXE) $(ocamlyacc_OBJECTS) version.h
all: ocamlyacc$(EXE)
ifeq ($(TOOLCHAIN),cc)
MKEXE_ANSI=$(MKEXE)
endif
ocamlyacc$(EXE): $(ocamlyacc_OBJECTS)
$(MKEXE_ANSI) -o $@ $^ $(EXTRALIBS)
$(MKEXE) -o $@ $^ $(EXTRALIBS)
version.h : $(ROOTDIR)/VERSION
echo "#define OCAML_VERSION \"`sed -e 1q $< | tr -d '\r'`\"" > $@

View File

@ -25,7 +25,13 @@
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include "caml/misc.h"
#include <string.h>
#define CAML_INTERNALS
#include "caml/config.h"
#include "caml/mlvalues.h"
#include "caml/osdeps.h"
#define caml_stat_strdup strdup
/* machine-dependent definitions */
/* the following definitions are for the Tahoe */
@ -69,9 +75,9 @@
/* defines for constructing filenames */
#define OUTPUT_SUFFIX ".ml"
#define VERBOSE_SUFFIX ".output"
#define INTERFACE_SUFFIX ".mli"
#define OUTPUT_SUFFIX T(".ml")
#define VERBOSE_SUFFIX T(".output")
#define INTERFACE_SUFFIX T(".mli")
/* keyword codes */
@ -212,21 +218,27 @@ extern char sflag;
extern char eflag;
extern char big_endian;
/* myname should be UTF-8 encoded */
extern char *myname;
extern char *cptr;
extern char *line;
extern int lineno;
/* virtual_input_file_name should be UTF-8 encoded */
extern char *virtual_input_file_name;
extern int outline;
extern char *action_file_name;
extern char *entry_file_name;
extern char *code_file_name;
extern char *input_file_name;
extern char *output_file_name;
extern char *text_file_name;
extern char *verbose_file_name;
extern char *interface_file_name;
extern char_os *action_file_name;
extern char_os *entry_file_name;
extern char_os *code_file_name;
extern char_os *input_file_name;
extern char_os *output_file_name;
extern char_os *text_file_name;
extern char_os *verbose_file_name;
extern char_os *interface_file_name;
/* UTF-8 versions of code_file_name and input_file_name */
extern char *code_file_name_disp;
extern char *input_file_name_disp;
extern FILE *action_file;
extern FILE *entry_file;
@ -318,7 +330,7 @@ extern void lr0 (void);
extern void make_parser (void);
extern void no_grammar (void) Noreturn;
extern void no_space (void) Noreturn;
extern void open_error (char *filename) Noreturn;
extern void open_error (char_os *filename) Noreturn;
extern void output (void);
extern void prec_redeclared (void);
extern void polymorphic_entry_point(char *s) Noreturn;

View File

@ -19,6 +19,9 @@
#include "defs.h"
/* String displayed if we can't malloc a buffer for the UTF-8 conversion */
static char *unknown = "<unknown; out of memory>";
void fatal(char *msg)
{
fprintf(stderr, "%s: f - %s\n", myname, msg);
@ -33,9 +36,10 @@ void no_space(void)
}
void open_error(char *filename)
void open_error(char_os *filename)
{
fprintf(stderr, "%s: f - cannot open \"%s\"\n", myname, filename);
char *u8 = caml_stat_strdup_of_os(filename);
fprintf(stderr, "%s: f - cannot open \"%s\"\n", myname, (u8 ? u8 : unknown));
done(2);
}

View File

@ -33,12 +33,14 @@ char eflag;
char sflag;
char big_endian;
char *file_prefix = 0;
char_os *file_prefix = 0;
char *myname = "yacc";
char temp_form[] = "yacc.XXXXXXX";
char_os temp_form[] = T("yacc.XXXXXXX");
#ifdef _WIN32
char dirsep = '\\';
wchar_t dirsep = L'\\';
/* mingw provides an implementation of mkstemp, but it's ANSI only */
#undef HAS_MKSTEMP
#else
char dirsep = '/';
#endif
@ -47,14 +49,16 @@ int lineno;
char *virtual_input_file_name = NULL;
int outline;
char *action_file_name;
char *entry_file_name;
char *code_file_name;
char *interface_file_name;
char *input_file_name = "";
char *output_file_name;
char *text_file_name;
char *verbose_file_name;
char_os *action_file_name;
char_os *entry_file_name;
char_os *code_file_name;
char *code_file_name_disp;
char_os *interface_file_name;
char_os *input_file_name = T("");
char *input_file_name_disp;
char_os *output_file_name;
char_os *text_file_name;
char_os *verbose_file_name;
#ifdef HAS_MKSTEMP
int action_fd = -1, entry_fd = -1, text_fd = -1;
@ -105,15 +109,15 @@ void done(int k)
if (text_fd != -1)
unlink(text_file_name);
#else
if (action_file) { fclose(action_file); unlink(action_file_name); }
if (entry_file) { fclose(entry_file); unlink(entry_file_name); }
if (text_file) { fclose(text_file); unlink(text_file_name); }
if (action_file) { fclose(action_file); unlink_os(action_file_name); }
if (entry_file) { fclose(entry_file); unlink_os(entry_file_name); }
if (text_file) { fclose(text_file); unlink_os(text_file_name); }
#endif
if (output_file && k > 0) {
fclose(output_file); unlink(output_file_name);
fclose(output_file); unlink_os(output_file_name);
}
if (interface_file && k > 0) {
fclose(interface_file); unlink(interface_file_name);
fclose(interface_file); unlink_os(interface_file_name);
}
exit(k);
}
@ -149,12 +153,13 @@ void usage(void)
exit(1);
}
void getargs(int argc, char **argv)
void getargs(int argc, char_os **argv)
{
register int i;
register char *s;
register char_os *s;
if (argc > 0) myname = argv[0];
if (argc > 0) myname = caml_stat_strdup_of_os(argv[0]);
if (!myname) no_space();
for (i = 1; i < argc; ++i)
{
s = argv[i];
@ -163,12 +168,12 @@ void getargs(int argc, char **argv)
{
case '\0':
input_file = stdin;
file_prefix = "stdin";
file_prefix = T("stdin");
if (i + 1 < argc) usage();
return;
case '-':
if (!strcmp (argv[i], "--strict")){
if (!strcmp_os (argv[i], T("--strict"))){
eflag = 1;
goto end_of_option;
}
@ -176,11 +181,11 @@ void getargs(int argc, char **argv)
goto no_more_options;
case 'v':
if (!strcmp (argv[i], "-version")){
if (!strcmp_os (argv[i], T("-version"))){
printf ("The OCaml parser generator, version "
OCAML_VERSION "\n");
exit (0);
}else if (!strcmp (argv[i], "-vnum")){
}else if (!strcmp_os (argv[i], T("-vnum"))){
printf (OCAML_VERSION "\n");
exit (0);
}else{
@ -230,12 +235,14 @@ end_of_option:;
no_more_options:;
if (i + 1 != argc) usage();
input_file_name = argv[i];
input_file_name_disp = caml_stat_strdup_of_os(input_file_name);
if (!input_file_name_disp) no_space();
if (file_prefix == 0) {
int len;
len = strlen(argv[i]);
file_prefix = malloc(len + 1);
len = strlen_os(argv[i]);
file_prefix = MALLOC((len + 1) * sizeof(char_os));
if (file_prefix == 0) no_space();
strcpy(file_prefix, argv[i]);
strcpy_os(file_prefix, argv[i]);
while (len > 0) {
len--;
if (file_prefix[len] == '.') {
@ -265,30 +272,30 @@ allocate(unsigned int n)
void create_file_names(void)
{
int i, len;
char *tmpdir;
char_os *tmpdir;
#ifdef _WIN32
tmpdir = getenv("TEMP");
if (tmpdir == 0) tmpdir = ".";
tmpdir = _wgetenv(L"TEMP");
if (tmpdir == 0) tmpdir = L".";
#else
tmpdir = getenv("TMPDIR");
if (tmpdir == 0) tmpdir = "/tmp";
#endif
len = strlen(tmpdir);
len = strlen_os(tmpdir);
i = len + sizeof(temp_form);
if (len && tmpdir[len-1] != dirsep)
++i;
action_file_name = MALLOC(i);
action_file_name = MALLOC(i * sizeof(char_os));
if (action_file_name == 0) no_space();
entry_file_name = MALLOC(i);
entry_file_name = MALLOC(i * sizeof(char_os));
if (entry_file_name == 0) no_space();
text_file_name = MALLOC(i);
text_file_name = MALLOC(i * sizeof(char_os));
if (text_file_name == 0) no_space();
strcpy(action_file_name, tmpdir);
strcpy(entry_file_name, tmpdir);
strcpy(text_file_name, tmpdir);
strcpy_os(action_file_name, tmpdir);
strcpy_os(entry_file_name, tmpdir);
strcpy_os(text_file_name, tmpdir);
if (len && tmpdir[len - 1] != dirsep)
{
@ -298,13 +305,13 @@ void create_file_names(void)
++len;
}
strcpy(action_file_name + len, temp_form);
strcpy(entry_file_name + len, temp_form);
strcpy(text_file_name + len, temp_form);
strcpy_os(action_file_name + len, temp_form);
strcpy_os(entry_file_name + len, temp_form);
strcpy_os(text_file_name + len, temp_form);
action_file_name[len + 5] = 'a';
entry_file_name[len + 5] = 'e';
text_file_name[len + 5] = 't';
action_file_name[len + 5] = L'a';
entry_file_name[len + 5] = L'e';
text_file_name[len + 5] = L't';
#ifdef HAS_MKSTEMP
action_fd = mkstemp(action_file_name);
@ -317,35 +324,37 @@ void create_file_names(void)
if (text_fd == -1)
open_error(text_file_name);
#else
mktemp(action_file_name);
mktemp(entry_file_name);
mktemp(text_file_name);
mktemp_os(action_file_name);
mktemp_os(entry_file_name);
mktemp_os(text_file_name);
#endif
len = strlen(file_prefix);
len = strlen_os(file_prefix);
output_file_name = MALLOC(len + 7);
output_file_name = MALLOC((len + 7) * sizeof(char_os));
if (output_file_name == 0)
no_space();
strcpy(output_file_name, file_prefix);
strcpy(output_file_name + len, OUTPUT_SUFFIX);
strcpy_os(output_file_name, file_prefix);
strcpy_os(output_file_name + len, OUTPUT_SUFFIX);
code_file_name = output_file_name;
code_file_name_disp = caml_stat_strdup_of_os(code_file_name);
if (!code_file_name_disp) no_space();
if (vflag)
{
verbose_file_name = MALLOC(len + 8);
verbose_file_name = MALLOC((len + 8) * sizeof(char_os));
if (verbose_file_name == 0)
no_space();
strcpy(verbose_file_name, file_prefix);
strcpy(verbose_file_name + len, VERBOSE_SUFFIX);
strcpy_os(verbose_file_name, file_prefix);
strcpy_os(verbose_file_name + len, VERBOSE_SUFFIX);
}
interface_file_name = MALLOC(len + 8);
interface_file_name = MALLOC((len + 8) * sizeof(char_os));
if (interface_file_name == 0)
no_space();
strcpy(interface_file_name, file_prefix);
strcpy(interface_file_name + len, INTERFACE_SUFFIX);
strcpy_os(interface_file_name, file_prefix);
strcpy_os(interface_file_name + len, INTERFACE_SUFFIX);
}
@ -356,7 +365,7 @@ void open_files(void)
if (input_file == 0)
{
input_file = fopen(input_file_name, "r");
input_file = fopen_os(input_file_name, T("r"));
if (input_file == 0)
open_error(input_file_name);
}
@ -364,7 +373,7 @@ void open_files(void)
#ifdef HAS_MKSTEMP
action_file = fdopen(action_fd, "w");
#else
action_file = fopen(action_file_name, "w");
action_file = fopen_os(action_file_name, T("w"));
#endif
if (action_file == 0)
open_error(action_file_name);
@ -372,7 +381,7 @@ void open_files(void)
#ifdef HAS_MKSTEMP
entry_file = fdopen(entry_fd, "w");
#else
entry_file = fopen(entry_file_name, "w");
entry_file = fopen_os(entry_file_name, T("w"));
#endif
if (entry_file == 0)
open_error(entry_file_name);
@ -380,25 +389,25 @@ void open_files(void)
#ifdef HAS_MKSTEMP
text_file = fdopen(text_fd, "w");
#else
text_file = fopen(text_file_name, "w");
text_file = fopen_os(text_file_name, T("w"));
#endif
if (text_file == 0)
open_error(text_file_name);
if (vflag)
{
verbose_file = fopen(verbose_file_name, "w");
verbose_file = fopen_os(verbose_file_name, T("w"));
if (verbose_file == 0)
open_error(verbose_file_name);
}
output_file = fopen(output_file_name, "w");
output_file = fopen_os(output_file_name, T("w"));
if (output_file == 0)
open_error(output_file_name);
if (rflag)
{
code_file = fopen(code_file_name, "w");
code_file = fopen_os(code_file_name, T("w"));
if (code_file == 0)
open_error(code_file_name);
}
@ -406,12 +415,16 @@ void open_files(void)
code_file = output_file;
interface_file = fopen(interface_file_name, "w");
interface_file = fopen_os(interface_file_name, T("w"));
if (interface_file == 0)
open_error(interface_file_name);
}
#ifdef _WIN32
int wmain(int argc, wchar_t **argv)
#else
int main(int argc, char **argv)
#endif
{
set_signals();
getargs(argc, argv);

View File

@ -785,7 +785,7 @@ void output_stored_text(void)
register FILE *in, *out;
fclose(text_file);
text_file = fopen(text_file_name, "r");
text_file = fopen_os(text_file_name, T("r"));
if (text_file == NULL)
open_error(text_file_name);
in = text_file;
@ -802,7 +802,7 @@ void output_stored_text(void)
putc(c, out);
}
if (!lflag)
fprintf(out, line_format, ++outline + 1, code_file_name);
fprintf(out, line_format, ++outline + 1, code_file_name_disp);
}
@ -855,7 +855,7 @@ void output_trailing_text(void)
if (!lflag)
{
++outline;
fprintf(out, line_format, lineno, input_file_name);
fprintf(out, line_format, lineno, input_file_name_disp);
}
if (c == '\n')
++outline;
@ -867,7 +867,7 @@ void output_trailing_text(void)
if (!lflag)
{
++outline;
fprintf(out, line_format, lineno, input_file_name);
fprintf(out, line_format, lineno, input_file_name_disp);
}
do { putc(c, out); } while ((c = *++cptr) != '\n');
++outline;
@ -890,18 +890,18 @@ void output_trailing_text(void)
putc('\n', out);
}
if (!lflag)
fprintf(out, line_format, ++outline + 1, code_file_name);
fprintf(out, line_format, ++outline + 1, code_file_name_disp);
}
void copy_file(FILE **file, char *file_name)
void copy_file(FILE **file, char_os *file_name)
{
register int c, last;
register FILE *out = code_file;
int state = 0;
fclose(*file);
*file = fopen(file_name, "r");
*file = fopen_os(file_name, T("r"));
if (*file == NULL)
open_error(file_name);
@ -915,7 +915,7 @@ void copy_file(FILE **file, char *file_name)
case ' ': state = (state == 2) ? 3 : 0; break;
case '0':
if (state == 3){
fprintf (out, "%d \"%s", outline+2, code_file_name);
fprintf (out, "%d \"%s", outline+2, code_file_name_disp);
c = '"';
}
state = 0;

View File

@ -563,7 +563,7 @@ void copy_text(void)
if (line == 0)
unterminated_text(t_lineno, t_line, t_cptr);
}
fprintf(f, line_format, lineno, input_file_name);
fprintf(f, line_format, lineno, input_file_name_disp);
loop:
c = *cptr++;
@ -1286,7 +1286,7 @@ void copy_action(void)
item->name);
}
fprintf(f, " Obj.repr(\n");
fprintf(f, line_format, lineno, input_file_name);
fprintf(f, line_format, lineno, input_file_name_disp);
for (i = 0; i < cptr - line; i++) fputc(' ', f);
fputc ('(', f);
@ -1820,8 +1820,8 @@ void print_grammar(void)
void reader(void)
{
virtual_input_file_name = substring (input_file_name, 0,
strlen (input_file_name));
virtual_input_file_name = caml_stat_strdup_of_os(input_file_name);
if (!virtual_input_file_name) no_space();
create_symbol_table();
read_declarations();
output_token_type();

60
yacc/wstr.c Normal file
View File

@ -0,0 +1,60 @@
/**************************************************************************/
/* */
/* OCaml */
/* */
/* David Allsopp, OCaml Labs, Cambridge. */
/* */
/* Copyright 2017 MetaStack Solutions Ltd. */
/* */
/* All rights reserved. This file is distributed under the terms of */
/* the GNU Lesser General Public License version 2.1, with the */
/* special exception on linking described in the file LICENSE. */
/* */
/**************************************************************************/
/* Need at least Windows Vista for WC_ERR_INVALID_CHARS */
#define _WIN32_WINNT 0x600
#define WINVER 0x600
#include <windows.h>
/* See corresponding values in runtime/win32.c */
static int windows_unicode_enabled = WINDOWS_UNICODE;
static int windows_unicode_strict = 1;
/* Adapted from runtime/win32.c */
int win_wide_char_to_multi_byte(const wchar_t *s, int slen,
char *out, int outlen)
{
int retcode;
if (slen == 0)
return 0;
if (windows_unicode_enabled != 0)
retcode =
WideCharToMultiByte(CP_UTF8,
windows_unicode_strict ? WC_ERR_INVALID_CHARS : 0,
s, slen, out, outlen, NULL, NULL);
else
retcode =
WideCharToMultiByte(CP_ACP, 0, s, slen, out, outlen, NULL, NULL);
if (retcode == 0)
return -1;
return retcode;
}
char* caml_stat_strdup_of_utf16(const wchar_t *s)
{
char *out = NULL;
int retcode;
retcode = win_wide_char_to_multi_byte(s, -1, NULL, 0);
if (retcode >= 0) {
out = (char *)malloc(retcode);
win_wide_char_to_multi_byte(s, -1, out, retcode);
}
return out;
}