New libregexp and APE ported to native

front
ben 2016-04-26 22:23:44 -05:00
parent 651d6c2bc6
commit 0a460e1722
29 changed files with 1861 additions and 1677 deletions

View File

@ -1,15 +1,29 @@
#pragma src "/sys/src/libregexp"
#pragma lib "libregexp.a"
#pragma src "/sys/src/libregexp"
#pragma lib "libregexp.a"
enum
{
OANY = 0,
OBOL,
OCLASS,
OEOL,
OJMP,
ONOTNL,
ORUNE,
OSAVE,
OSPLIT,
OUNSAVE,
};
typedef struct Resub Resub;
typedef struct Reclass Reclass;
typedef struct Reinst Reinst;
typedef struct Reprog Reprog;
typedef struct Resub Resub;
typedef struct Reinst Reinst;
typedef struct Reprog Reprog;
typedef struct Rethread Rethread;
/*
* Sub expression matches
*/
struct Resub{
#pragma incomplete Reinst
#pragma incomplete Rethread
struct Resub
{
union
{
char *sp;
@ -21,46 +35,22 @@ struct Resub{
Rune *rep;
};
};
/*
* character class, each pair of rune's defines a range
*/
struct Reclass{
Rune *end;
Rune spans[64];
struct Reprog
{
Reinst *startinst;
Rethread *threads;
Rethread **thrpool;
char *regstr;
int len;
int nthr;
};
/*
* Machine instructions
*/
struct Reinst{
int type;
union {
Reclass *cp; /* class pointer */
Rune r; /* character */
int subid; /* sub-expression id for RBRA and LBRA */
Reinst *right; /* right child of OR */
};
union { /* regexp relies on these two being in the same union */
Reinst *left; /* left child of OR */
Reinst *next; /* next instruction for CAT & LBRA */
};
};
/*
* Reprogram definition
*/
struct Reprog{
Reinst *startinst; /* start pc */
Reclass class[16]; /* .data */
Reinst firstinst[5]; /* .text */
};
extern Reprog *regcomp(char*);
extern Reprog *regcomplit(char*);
extern Reprog *regcompnl(char*);
extern void regerror(char*);
extern int regexec(Reprog*, char*, Resub*, int);
extern void regsub(char*, char*, int, Resub*, int);
extern int rregexec(Reprog*, Rune*, Resub*, int);
extern void rregsub(Rune*, Rune*, int, Resub*, int);
Reprog* regcomp(char*);
Reprog* regcomplit(char*);
Reprog* regcompnl(char*);
void regerror(char*);
int regexec(Reprog*, char*, Resub*, int);
void regsub(char*, char*, int, Resub*, int);
int rregexec(Reprog*, Rune*, Resub*, int);
void rregsub(Rune*, Rune*, int, Resub*, int);
int reprogfmt(Fmt *);

View File

@ -6,20 +6,20 @@ Copyright (c) Lucent Technologies 1997
typedef double Awkfloat;
/* unsigned char is more trouble than it's worth */
typedef unsigned char uschar;
#define xfree(a) { if ((a) != NULL) { free((char *) a); a = NULL; } }
#define xfree(a) { if ((a) != nil) { free((a)); (a) = nil; } }
#define DEBUG
#ifdef DEBUG
/* uses have to be doubly parenthesized */
# define dprintf(x) if (dbg) printf x
# define dprint(x) if (dbg) print x
#else
# define dprintf(x)
# define dprint(x)
#endif
#define FOPEN_MAX 40 /* max number of open files */
#define EOF -1
extern char errbuf[];
extern int compile_time; /* 1 if compiling, 0 if running */
@ -28,6 +28,10 @@ extern int safe; /* 0 => unsafe, 1 => safe */
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
extern int recsize; /* size of current record, orig RECSIZE */
extern Biobuf stdin;
extern Biobuf stdout;
extern Biobuf stderr;
extern char **FS;
extern char **RS;
extern char **ORS;
@ -56,8 +60,8 @@ extern int patlen; /* length of pattern matched. set in b.c */
/* Cell: all information about a variable or constant */
typedef struct Cell {
uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */
uschar csub; /* CCON, CTEMP, CFLD, etc. */
uchar ctype; /* OCELL, OBOOL, OJUMP, etc. */
uchar csub; /* CCON, CTEMP, CFLD, etc. */
char *nval; /* name, for variables only */
char *sval; /* string value */
Awkfloat fval; /* value as number */
@ -66,7 +70,7 @@ typedef struct Cell {
} Cell;
typedef struct Array { /* symbol table array */
int nelem; /* elements in table right now */
int nelemt; /* elements in table right now */
int size; /* size of tab */
Cell **tab; /* hash table pointers */
} Array;

View File

@ -23,8 +23,9 @@ THIS SOFTWARE.
****************************************************************/
%{
#include <stdio.h>
#include <string.h>
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "awk.h"
#define makedfa(a,b) compre(a)

View File

@ -22,10 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include <bio.h>
#include "awk.h"
#include "y.tab.h"
@ -90,9 +90,8 @@ Keyword keywords[] ={ /* keep sorted: binary searched */
{ "while", WHILE, WHILE },
};
#define DEBUG
#ifdef DEBUG
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
#define RET(x) { if(dbg)print("lex %s\n", tokname(x)); return(x); }
#else
#define RET(x) return(x)
#endif
@ -170,7 +169,7 @@ int yylex(void)
static char *buf = 0;
static int bufsize = 500;
if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
if (buf == 0 && (buf = (char *) malloc(bufsize)) == nil)
FATAL( "out of space in yylex" );
if (sc) {
sc = 0;
@ -353,7 +352,7 @@ int string(void)
static char *buf = 0;
static int bufsz = 500;
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
FATAL("out of space for strings");
for (bp = buf; (c = input()) != '"'; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
@ -401,7 +400,7 @@ int string(void)
}
*px = 0;
unput(c);
sscanf(xbuf, "%x", &n);
n = strtol(xbuf, nil, 16);
*bp++ = n;
break;
}
@ -497,7 +496,7 @@ int regexpr(void)
static int bufsz = 500;
char *bp;
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
FATAL("out of space for rex expr");
bp = buf;
for ( ; (c = input()) != '/' && c != 0; ) {
@ -526,7 +525,7 @@ char ebuf[300];
char *ep = ebuf;
char yysbuf[100]; /* pushback buffer */
char *yysptr = yysbuf;
FILE *yyin = 0;
Biobuf *yyin;
int input(void) /* get next lexical input character */
{
@ -535,7 +534,7 @@ int input(void) /* get next lexical input character */
if (yysptr > yysbuf)
c = *--yysptr;
else if (lexprog != NULL) { /* awk '...' */
else if (lexprog != nil) { /* awk '...' */
if ((c = *lexprog) != 0)
lexprog++;
} else /* awk -f ... */

View File

@ -22,17 +22,14 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
#define DEBUG
#include <stdio.h>
#include <string.h>
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
#include <bio.h>
#include "awk.h"
#include "y.tab.h"
FILE *infile = NULL;
Biobuf *infile;
char *file = "";
char *record;
int recsize = RECSIZE;
@ -50,17 +47,17 @@ int donerec; /* 1 = record is valid (no flds have changed) */
int lastfld = 0; /* last used field */
int argno = 1; /* current input argument number */
extern Awkfloat *ARGC;
extern Awkfloat *AARGC;
static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
static Cell dollar0 = { OCELL, CFLD, nil, "", 0.0, REC|STR|DONTFREE };
static Cell dollar1 = { OCELL, CFLD, nil, "", 0.0, FLD|STR|DONTFREE };
void recinit(unsigned int n)
{
record = (char *) malloc(n);
fields = (char *) malloc(n);
fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *));
if (record == NULL || fields == NULL || fldtab == NULL)
if (record == nil || fields == nil || fldtab == nil)
FATAL("out of space for $0 and fields");
fldtab[0] = (Cell *) malloc(sizeof (Cell));
*fldtab[0] = dollar0;
@ -76,10 +73,10 @@ void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
for (i = n1; i <= n2; i++) {
fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
if (fldtab[i] == NULL)
if (fldtab[i] == nil)
FATAL("out of space in makefields %d", i);
*fldtab[i] = dollar1;
sprintf(temp, "%d", i);
sprint(temp, "%d", i);
fldtab[i]->nval = tostring(temp);
}
}
@ -89,7 +86,7 @@ void initgetrec(void)
int i;
char *p;
for (i = 1; i < *ARGC; i++) {
for (i = 1; i < *AARGC; i++) {
if (!isclvar(p = getargv(i))) { /* find 1st real filename */
setsval(lookup("FILENAME", symtab), getargv(i));
return;
@ -97,7 +94,7 @@ void initgetrec(void)
setclvar(p); /* a commandline assignment before filename */
argno++;
}
infile = stdin; /* no filenames, so use stdin */
infile = &stdin; /* no filenames, so use &stdin */
}
int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
@ -111,16 +108,16 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
firsttime = 0;
initgetrec();
}
dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
*RS, *FS, *ARGC, *FILENAME) );
dprint( ("RS=<%s>, FS=<%s>, AARGC=%g, FILENAME=%s\n",
*RS, *FS, *AARGC, *FILENAME) );
if (isrecord) {
donefld = 0;
donerec = 1;
}
buf[0] = 0;
while (argno < *ARGC || infile == stdin) {
dprintf( ("argno=%d, file=|%s|\n", argno, file) );
if (infile == NULL) { /* have to open a new file */
while (argno < *AARGC || infile == &stdin) {
dprint( ("argno=%d, file=|%s|\n", argno, file) );
if (infile == nil) { /* have to open a new file */
file = getargv(argno);
if (*file == '\0') { /* it's been zapped */
argno++;
@ -132,10 +129,10 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
continue;
}
*FILENAME = file;
dprintf( ("opening file %s\n", file) );
dprint( ("opening file %s\n", file) );
if (*file == '-' && *(file+1) == '\0')
infile = stdin;
else if ((infile = fopen(file, "r")) == NULL)
infile = &stdin;
else if ((infile = Bopen(file, OREAD)) == nil)
FATAL("can't open file %s", file);
setfval(fnrloc, 0.0);
}
@ -158,9 +155,9 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
return 1;
}
/* EOF arrived on this file; set up next */
if (infile != stdin)
fclose(infile);
infile = NULL;
if (infile != &stdin)
Bterm(infile);
infile = nil;
argno++;
}
*pbuf = buf;
@ -170,13 +167,13 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
void nextfile(void)
{
if (infile != stdin)
fclose(infile);
infile = NULL;
if (infile != &stdin)
Bterm(infile);
infile = nil;
argno++;
}
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
int readrec(char **pbuf, int *pbufsize, Biobuf *inf) /* read one record into buf */
{
int sep, c;
char *rr, *buf = *pbuf;
@ -187,13 +184,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
strcpy(inputFS, *FS); /* for subsequent field splitting */
if ((sep = **RS) == 0) {
sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
while ((c=Bgetc(inf)) == '\n' && c != EOF) /* skip leading \n's */
;
if (c != EOF)
ungetc(c, inf);
Bungetc(inf);
}
for (rr = buf; ; ) {
for (; (c=getc(inf)) != sep && c != EOF; ) {
for (; (c=Bgetc(inf)) != sep && c != EOF; ) {
if (rr-buf+1 > bufsize)
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
FATAL("input record `%.30s...' too long", buf);
@ -201,7 +198,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
}
if (**RS == sep || c == EOF)
break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
if ((c = Bgetc(inf)) == '\n' || c == EOF) /* 2 in a row */
break;
if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
FATAL("input record `%.30s...' too long", buf);
@ -211,7 +208,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
FATAL("input record `%.30s...' too long", buf);
*rr = 0;
dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
dprint( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
*pbuf = buf;
*pbufsize = bufsize;
return c == EOF && rr == buf ? 0 : 1;
@ -223,10 +220,10 @@ char *getargv(int n) /* get ARGV[n] */
char *s, temp[50];
extern Array *ARGVtab;
sprintf(temp, "%d", n);
sprint(temp, "%d", n);
x = setsymtab(temp, "", 0.0, STR, ARGVtab);
s = getsval(x);
dprintf( ("getargv(%d) returns |%s|\n", n, s) );
dprint( ("getargv(%d) returns |%s|\n", n, s) );
return s;
}
@ -245,7 +242,7 @@ void setclvar(char *s) /* set var=value from s */
q->fval = atof(q->sval);
q->tval |= NUM;
}
dprintf( ("command line set %s to |%s|\n", s, p) );
dprint( ("command line set %s to |%s|\n", s, p) );
}
@ -265,7 +262,7 @@ void fldbld(void) /* create fields from current record */
n = strlen(r);
if (n > fieldssize) {
xfree(fields);
if ((fields = (char *) malloc(n+1)) == NULL)
if ((fields = (char *) malloc(n+1)) == nil)
FATAL("out of space for fields in fldbld %d", n);
fieldssize = n;
}
@ -273,7 +270,7 @@ void fldbld(void) /* create fields from current record */
i = 0; /* number of fields accumulated here */
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
} else if ((sep = *inputFS) == ' ') { /* default whitespace */
} else if (*inputFS == ' ') { /* default whitespace */
for (i = 0; ; ) {
while (*r == ' ' || *r == '\t' || *r == '\n')
r++;
@ -339,7 +336,7 @@ void fldbld(void) /* create fields from current record */
if (dbg) {
for (j = 0; j <= lastfld; j++) {
p = fldtab[j];
printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
print("field %d (%s): |%s|\n", j, p->nval, p->sval);
}
}
}
@ -383,7 +380,7 @@ void growfldtab(int n) /* make new fields up to at least $n */
if (n > nf)
nf = n;
fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *)));
if (fldtab == NULL)
if (fldtab == nil)
FATAL("out of space creating %d fields", nf);
makefields(nfields+1, nf);
nfields = nf;
@ -395,12 +392,12 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
/* the fields are all stored in this one array with \0's */
char *fr;
void *p;
int i, tempstat, n;
int i, n;
n = strlen(rec);
if (n > fieldssize) {
xfree(fields);
if ((fields = (char *) malloc(n+1)) == NULL)
if ((fields = (char *) malloc(n+1)) == nil)
FATAL("out of space for fields in refldbld %d", n);
fieldssize = n;
}
@ -409,7 +406,7 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
if (*rec == '\0')
return 0;
p = compre(fs);
dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
dprint( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
for (i = 1; ; i++) {
if (i > nfields)
growfldtab(i);
@ -417,15 +414,15 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
xfree(fldtab[i]->sval);
fldtab[i]->tval = FLD | STR | DONTFREE;
fldtab[i]->sval = fr;
dprintf( ("refldbld: i=%d\n", i) );
dprint( ("refldbld: i=%d\n", i) );
if (nematch(p, rec, rec)) {
dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
dprint( ("match %s (%d chars)\n", patbeg, patlen) );
strncpy(fr, rec, patbeg-rec);
fr += patbeg - rec + 1;
*(fr-1) = '\0';
rec = patbeg + patlen;
} else {
dprintf( ("no match %s\n", rec) );
dprint( ("no match %s\n", rec) );
strcpy(fr, rec);
break;
}
@ -457,15 +454,15 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
FATAL("built giant record `%.30s...'", record);
*r = '\0';
dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
if (freeable(fldtab[0]))
xfree(fldtab[0]->sval);
fldtab[0]->tval = REC | STR | DONTFREE;
fldtab[0]->sval = record;
dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
dprintf( ("recbld = |%s|\n", record) );
dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
dprint( ("recbld = |%s|\n", record) );
donerec = 1;
}
@ -484,24 +481,26 @@ void SYNTAX(char *fmt, ...)
if (been_here++ > 2)
return;
fprintf(stderr, "%s: ", cmdname);
Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt);
vfprintf(stderr, fmt, varg);
Bvprint(&stderr, fmt, varg);
va_end(varg);
if(compile_time == 1 && cursource() != NULL)
fprintf(stderr, " at %s:%d", cursource(), lineno);
if(compile_time == 1 && cursource() != nil)
Bprint(&stderr, " at %s:%d", cursource(), lineno);
else
fprintf(stderr, " at line %d", lineno);
if (curfname != NULL)
fprintf(stderr, " in function %s", curfname);
fprintf(stderr, "\n");
Bprint(&stderr, " at line %d", lineno);
if (curfname != nil)
Bprint(&stderr, " in function %s", curfname);
Bprint(&stderr, "\n");
errorflag = 2;
eprint();
}
void fpecatch(int n)
int handler(void *, char *err)
{
FATAL("floating point exception %d", n);
Bflush(&stdout);
fprint(2, "%s\n", err);
return 0;
}
extern int bracecnt, brackcnt, parencnt;
@ -520,16 +519,16 @@ void bracecheck(void)
bcheck2(parencnt, '(', ')');
}
void bcheck2(int n, int c1, int c2)
void bcheck2(int n, int, int c2)
{
if (n == 1)
fprintf(stderr, "\tmissing %c\n", c2);
Bprint(&stderr, "\tmissing %c\n", c2);
else if (n > 1)
fprintf(stderr, "\t%d missing %c's\n", n, c2);
Bprint(&stderr, "\t%d missing %c's\n", n, c2);
else if (n == -1)
fprintf(stderr, "\textra %c\n", c2);
Bprint(&stderr, "\textra %c\n", c2);
else if (n < -1)
fprintf(stderr, "\t%d extra %c's\n", -n, c2);
Bprint(&stderr, "\t%d extra %c's\n", -n, c2);
}
void FATAL(char *fmt, ...)
@ -537,15 +536,15 @@ void FATAL(char *fmt, ...)
extern char *cmdname;
va_list varg;
fflush(stdout);
fprintf(stderr, "%s: ", cmdname);
Bflush(&stdout);
Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt);
vfprintf(stderr, fmt, varg);
Bvprint(&stderr, fmt, varg);
va_end(varg);
error();
if (dbg > 1) /* core dump if serious debugging on */
abort();
exit(2);
exits("FATAL");
}
void WARNING(char *fmt, ...)
@ -553,10 +552,10 @@ void WARNING(char *fmt, ...)
extern char *cmdname;
va_list varg;
fflush(stdout);
fprintf(stderr, "%s: ", cmdname);
Bflush(&stdout);
Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt);
vfprintf(stderr, fmt, varg);
Bvprint(&stderr, fmt, varg);
va_end(varg);
error();
}
@ -566,13 +565,13 @@ void error()
extern Node *curnode;
int line;
fprintf(stderr, "\n");
Bprint(&stderr, "\n");
if (compile_time != 2 && NR && *NR > 0) {
if (strcmp(*FILENAME, "-") != 0)
fprintf(stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
Bprint(&stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
else
fprintf(stderr, " input record number %d", (int) (*FNR));
fprintf(stderr, "\n");
Bprint(&stderr, " input record number %d", (int) (*FNR));
Bprint(&stderr, "\n");
}
if (compile_time != 2 && curnode)
line = curnode->lineno;
@ -580,14 +579,14 @@ void error()
line = lineno;
else
line = -1;
if (compile_time == 1 && cursource() != NULL){
if (compile_time == 1 && cursource() != nil){
if(line >= 0)
fprintf(stderr, " source %s:%d", cursource(), line);
Bprint(&stderr, " source %s:%d", cursource(), line);
else
fprintf(stderr, " source file %s", cursource());
Bprint(&stderr, " source file %s", cursource());
}else if(line >= 0)
fprintf(stderr, " source line %d", line);
fprintf(stderr, "\n");
Bprint(&stderr, " source line %d", line);
Bprint(&stderr, "\n");
eprint();
}
@ -607,23 +606,23 @@ void eprint(void) /* try to print context around error */
;
while (*p == '\n')
p++;
fprintf(stderr, " context is\n\t");
Bprint(&stderr, " context is\n\t");
for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
;
for ( ; p < q; p++)
if (*p)
putc(*p, stderr);
fprintf(stderr, " >>> ");
Bputc(&stderr, *p);
Bprint(&stderr, " >>> ");
for ( ; p < ep; p++)
if (*p)
putc(*p, stderr);
fprintf(stderr, " <<< ");
Bputc(&stderr, *p);
Bprint(&stderr, " <<< ");
if (*ep)
while ((c = input()) != '\n' && c != '\0' && c != EOF) {
putc(c, stderr);
Bputc(&stderr, c);
bclass(c);
}
putc('\n', stderr);
Bputc(&stderr, '\n');
ep = ebuf;
}
@ -642,12 +641,10 @@ void bclass(int c)
double errcheck(double x, char *s)
{
if (errno == EDOM) {
errno = 0;
if (isNaN(x)) {
WARNING("%s argument out of domain", s);
x = 1;
} else if (errno == ERANGE) {
errno = 0;
} else if (isInf(x, 1) || isInf(x, -1)) {
WARNING("%s result out of range", s);
x = 1;
}
@ -668,7 +665,6 @@ int isclvar(char *s) /* is s of form var=something ? */
/* strtod is supposed to be a proper test of what's a valid number */
#include <math.h>
int is_number(char *s)
{
double r;
@ -699,9 +695,8 @@ int is_number(char *s)
return 0; /* can't be a number */
}
errno = 0;
r = strtod(s, &ep);
if (ep == s || r == HUGE_VAL || errno == ERANGE)
if (ep == s || isInf(r, 1) || isInf(r, -1))
return 0;
while (*ep == ' ' || *ep == '\t' || *ep == '\n')
ep++;

View File

@ -24,21 +24,21 @@ THIS SOFTWARE.
char *version = "version 19990602";
#define DEBUG
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "awk.h"
#include "y.tab.h"
extern char **environ;
extern int nfields;
Biobuf stdin;
Biobuf stdout;
Biobuf stderr;
int dbg = 0;
char *cmdname; /* gets argv[0] for error messages */
extern FILE *yyin; /* lex input file */
extern Biobuf *yyin; /* lex input file */
char *lexprog; /* points to program argument if it exists */
extern int errorflag; /* non-zero if any syntax errors; set by yyerror */
int compile_time = 2; /* for error printing: */
@ -50,18 +50,23 @@ int curpfile = 0; /* current filename */
int safe = 0; /* 1 => "safe" mode */
int main(int argc, char *argv[])
void main(int argc, char *argv[])
{
char *fs = NULL, *marg;
char *fs = nil, *marg;
int temp;
Binit(&stdin, 0, OREAD);
Binit(&stdout, 1, OWRITE);
Binit(&stderr, 2, OWRITE);
cmdname = argv[0];
if (argc == 1) {
fprintf(stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
exit(1);
Bprint(&stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
exits("usage");
}
signal(SIGFPE, fpecatch);
yyin = NULL;
atnotify(handler, 1);
yyin = nil;
symtab = makesymtab(NSYMTAB);
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
@ -94,7 +99,7 @@ int main(int argc, char *argv[])
else if (argc > 1 && argv[1][0] != 0)
fs = &argv[1][0];
}
if (fs == NULL || *fs == '\0')
if (fs == nil || *fs == '\0')
WARNING("field separator FS is empty");
break;
case 'v': /* -v a=1 to be done NOW. one -v for each */
@ -120,11 +125,11 @@ int main(int argc, char *argv[])
dbg = atoi(&argv[1][2]);
if (dbg == 0)
dbg = 1;
printf("awk %s\n", version);
print("awk %s\n", version);
break;
case 'V': /* added for exptools "standard" */
printf("awk %s\n", version);
exit(0);
print("awk %s\n", version);
exits(0);
break;
default:
WARNING("unknown option %s ignored", argv[1]);
@ -137,10 +142,10 @@ int main(int argc, char *argv[])
if (npfile == 0) { /* no -f; first argument is program */
if (argc <= 1) {
if (dbg)
exit(0);
exits(0);
FATAL("no program given");
}
dprintf( ("program = |%s|\n", argv[1]) );
dprint( ("program = |%s|\n", argv[1]) );
lexprog = argv[1];
argc--;
argv++;
@ -149,20 +154,20 @@ int main(int argc, char *argv[])
syminit();
compile_time = 1;
argv[0] = cmdname; /* put prog name at front of arglist */
dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
dprint( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
arginit(argc, argv);
if (!safe)
envinit(environ);
yyparse();
if (fs)
*FS = qstring(fs, '\0');
dprintf( ("errorflag=%d\n", errorflag) );
dprint( ("errorflag=%d\n", errorflag) );
if (errorflag == 0) {
compile_time = 0;
run(winner);
} else
bracecheck();
return(errorflag);
if(errorflag)
exits("error");
exits(0);
}
int pgetc(void) /* get 1 character from awk program */
@ -170,20 +175,20 @@ int pgetc(void) /* get 1 character from awk program */
int c;
for (;;) {
if (yyin == NULL) {
if (yyin == nil) {
if (curpfile >= npfile)
return EOF;
if (strcmp(pfile[curpfile], "-") == 0)
yyin = stdin;
else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
yyin = &stdin;
else if ((yyin = Bopen(pfile[curpfile], OREAD)) == nil)
FATAL("can't open file %s", pfile[curpfile]);
lineno = 1;
}
if ((c = getc(yyin)) != EOF)
if ((c = Bgetc(yyin)) != EOF)
return c;
if (yyin != stdin)
fclose(yyin);
yyin = NULL;
if (yyin != &stdin)
Bterm(yyin);
yyin = nil;
curpfile++;
}
}
@ -193,5 +198,5 @@ char *cursource(void) /* current source file name */
if (npfile > 0)
return pfile[curpfile];
else
return NULL;
return nil;
}

View File

@ -28,9 +28,9 @@ THIS SOFTWARE.
* it finds the indices in y.tab.h, produced by yacc.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "awk.h"
#include "y.tab.h"
@ -39,7 +39,7 @@ struct xx
char *name;
char *pname;
} proc[] = {
{ PROGRAM, "program", NULL },
{ PROGRAM, "program", nil },
{ BOR, "boolop", " || " },
{ AND, "boolop", " && " },
{ NOT, "boolop", " !" },
@ -49,13 +49,13 @@ struct xx
{ LT, "relop", " < " },
{ GE, "relop", " >= " },
{ GT, "relop", " > " },
{ ARRAY, "array", NULL },
{ ARRAY, "array", nil },
{ INDIRECT, "indirect", "$(" },
{ SUBSTR, "substr", "substr" },
{ SUB, "sub", "sub" },
{ GSUB, "gsub", "gsub" },
{ INDEX, "sindex", "sindex" },
{ SPRINTF, "awksprintf", "sprintf " },
{ SPRINTF, "awksprintf", "sprintf" },
{ ADD, "arith", " + " },
{ MINUS, "arith", " - " },
{ MULT, "arith", " * " },
@ -68,8 +68,8 @@ struct xx
{ PREDECR, "incrdecr", "--" },
{ POSTDECR, "incrdecr", "--" },
{ CAT, "cat", " " },
{ PASTAT, "pastat", NULL },
{ PASTAT2, "dopa2", NULL },
{ PASTAT, "pastat", nil },
{ PASTAT2, "dopa2", nil },
{ MATCH, "matchop", " ~ " },
{ NOTMATCH, "matchop", " !~ " },
{ MATCHFCN, "matchop", "matchop" },
@ -110,59 +110,62 @@ struct xx
char *table[SIZE];
char *names[SIZE];
int main(int argc, char *argv[])
void main(int, char**)
{
struct xx *p;
int i, n, tok;
char c;
FILE *fp;
char buf[200], name[200], def[200];
int i, tok;
Biobuf *fp;
char *buf, *toks[3];
printf("#include <stdio.h>\n");
printf("#include \"awk.h\"\n");
printf("#include \"y.tab.h\"\n\n");
print("#include <u.h>\n");
print("#include <libc.h>\n");
print("#include <bio.h>\n");
print("#include \"awk.h\"\n");
print("#include \"y.tab.h\"\n\n");
for (i = SIZE; --i >= 0; )
names[i] = "";
if ((fp = fopen("y.tab.h", "r")) == NULL) {
fprintf(stderr, "maketab can't open y.tab.h!\n");
exit(1);
if ((fp = Bopen("y.tab.h", OREAD)) == nil) {
fprint(2, "maketab can't open y.tab.h!\n");
exits("can't open y.tab.h");
}
printf("static char *printname[%d] = {\n", SIZE);
print("static char *printname[%d] = {\n", SIZE);
i = 0;
while (fgets(buf, sizeof buf, fp) != NULL) {
n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
while ((buf = Brdline(fp, '\n')) != nil) {
buf[Blinelen(fp)-1] = '\0';
tokenize(buf, toks, 3);
if (toks[0] == nil || strcmp("#define", toks[0]) != 0) /* not a valid #define */
continue;
tok = strtol(toks[2], nil, 10);
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
fprintf(stderr, "maketab funny token %d %s\n", tok, buf);
exit(1);
fprint(2, "maketab funny token %d %s\n", tok, buf);
exits("funny token");
}
names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1);
strcpy(names[tok-FIRSTTOKEN], name);
printf("\t(char *) \"%s\",\t/* %d */\n", name, tok);
names[tok-FIRSTTOKEN] = (char *) malloc(strlen(toks[1])+1);
strcpy(names[tok-FIRSTTOKEN], toks[1]);
print("\t(char *) \"%s\",\t/* %d */\n", toks[1], tok);
i++;
}
printf("};\n\n");
print("};\n\n");
for (p=proc; p->token!=0; p++)
table[p->token-FIRSTTOKEN] = p->name;
printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
print("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
for (i=0; i<SIZE; i++)
if (table[i]==0)
printf("\tnullproc,\t/* %s */\n", names[i]);
print("\tnullproc,\t/* %s */\n", names[i]);
else
printf("\t%s,\t/* %s */\n", table[i], names[i]);
printf("};\n\n");
print("\t%s,\t/* %s */\n", table[i], names[i]);
print("};\n\n");
printf("char *tokname(int n)\n"); /* print a tokname() function */
printf("{\n");
printf(" static char buf[100];\n\n");
printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
printf(" sprintf(buf, \"token %%d\", n);\n");
printf(" return buf;\n");
printf(" }\n");
printf(" return printname[n-FIRSTTOKEN];\n");
printf("}\n");
return 0;
print("char *tokname(int n)\n"); /* print a tokname() function */
print("{\n");
print(" static char buf[100];\n\n");
print(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
print(" sprint(buf, \"token %%d\", n);\n");
print(" return buf;\n");
print(" }\n");
print(" return printname[n-FIRSTTOKEN];\n");
print("}\n");
exits(0);
}

View File

@ -6,6 +6,7 @@ OFILES=re.$O\
main.$O\
parse.$O\
proctab.$O\
popen.$O\
tran.$O\
lib.$O\
run.$O\
@ -28,11 +29,6 @@ UPDATE=\
${TARG:%=/386/bin/%}\
</sys/src/cmd/mkone
CFLAGS=-c -D_REGEXP_EXTENSION -D_RESEARCH_SOURCE -D_BSD_EXTENSION -DUTF
YFLAGS=-S -d -v
CC=pcc
LD=pcc
cpuobjtype=`{sed -n 's/^O=//p' /$cputype/mkfile}
y.tab.h awkgram.c: $YFILES
$YACC -o awkgram.c $YFLAGS $prereq
@ -43,10 +39,10 @@ clean:V:
nuke:V:
rm -f *.[$OS] [$OS].out [$OS].maketab y.tab.? y.debug y.output awkgram.c proctab.c $TARG
proctab.c: $cpuobjtype.maketab
./$cpuobjtype.maketab >proctab.c
proctab.c: $O.maketab
./$O.maketab >proctab.c
$cpuobjtype.maketab: y.tab.h maketab.c
$O.maketab: y.tab.h maketab.c
objtype=$cputype
mk maketab.$cputype

View File

@ -22,10 +22,9 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
#define DEBUG
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "awk.h"
#include "y.tab.h"
@ -34,9 +33,9 @@ Node *nodealloc(int n)
Node *x;
x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *));
if (x == NULL)
if (x == nil)
FATAL("out of space in nodealloc");
x->nnext = NULL;
x->nnext = nil;
x->lineno = lineno;
return(x);
}
@ -220,11 +219,11 @@ Node *linkum(Node *a, Node *b)
if (errorflag) /* don't link things that are wrong */
return a;
if (a == NULL)
if (a == nil)
return(b);
else if (b == NULL)
else if (b == nil)
return(a);
for (c = a; c->nnext != NULL; c = c->nnext)
for (c = a; c->nnext != nil; c = c->nnext)
;
c->nnext = b;
return(a);
@ -245,7 +244,7 @@ void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */
for (p = vl; p; p = p->nnext)
n++;
v->fval = n;
dprintf( ("defining func %s (%d args)\n", v->nval, n) );
dprint( ("defining func %s (%d args)\n", v->nval, n) );
}
int isarg(char *s) /* is s in argument list for current function? */
@ -262,7 +261,7 @@ int isarg(char *s) /* is s in argument list for current function? */
int ptoi(void *p) /* convert pointer to integer */
{
return (int) (long) p; /* swearing that p fits, of course */
return (int) (vlong) p; /* swearing that p fits, of course */
}
Node *itonp(int i) /* and vice versa */

View File

@ -44,7 +44,6 @@ extern void quoted(char **, char **, char *);
extern int match(void *, char *, char *);
extern int pmatch(void *, char *, char *);
extern int nematch(void *, char *, char *);
extern int countposn(char *, int);
extern void overflow(void);
extern int pgetc(void);
@ -100,7 +99,7 @@ extern void makefields(int, int);
extern void growfldtab(int n);
extern int getrec(char **, int *, int);
extern void nextfile(void);
extern int readrec(char **buf, int *bufsize, FILE *inf);
extern int readrec(char **buf, int *bufsize, Biobuf *inf);
extern char *getargv(int);
extern void setclvar(char *);
extern void fldbld(void);
@ -110,7 +109,7 @@ extern int refldbld(char *, char *);
extern void recbld(void);
extern Cell *fieldadr(int);
extern void yyerror(char *);
extern void fpecatch(int);
extern int handler(void*, char*);
extern void bracecheck(void);
extern void bcheck2(int, int, int);
extern void SYNTAX(char *, ...);
@ -165,13 +164,13 @@ extern Cell *instat(Node **, int);
extern Cell *bltin(Node **, int);
extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int);
extern FILE *redirect(int, Node *);
extern FILE *openfile(int, char *);
extern char *filename(FILE *);
extern Biobuf *redirect(int, Node *);
extern Biobuf *openfile(int, char *);
extern char *filename(Biobuf *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
extern Cell *sub(Node **, int);
extern Cell *gsub(Node **, int);
extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);
extern Biobuf *popen(char *, int);
extern int pclose(Biobuf *);

View File

@ -22,18 +22,13 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
#define DEBUG
#include <stdio.h>
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include <setjmp.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <bio.h>
#include <regexp.h>
#include "awk.h"
#include "y.tab.h"
#include "regexp.h"
/* This file provides the interface between the main body of
* awk and the pattern matching package. It preprocesses
@ -198,11 +193,11 @@ pmatch(void *p, char *s, char *start)
{
Resub m;
m.s.sp = start;
m.e.ep = 0;
m.sp = start;
m.ep = 0;
if (regexec((Reprog *) p, (char *) s, &m, 1)) {
patbeg = m.s.sp;
patlen = m.e.ep-m.s.sp;
patbeg = m.sp;
patlen = m.ep-m.sp;
return 1;
}
patlen = -1;
@ -250,7 +245,7 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
{
char *p = *s;
char *t = *to;
wchar_t c;
Rune c;
switch(c = *p++) {
case 't':
@ -273,8 +268,8 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
*t++ = '\\';
if (c == 'x') { /* hexadecimal goo follows */
c = hexstr(&p);
if (t < end-MB_CUR_MAX)
t += wctomb(t, c);
if (t < end-UTFmax)
t += runelen(c);
else overflow();
*to = t;
*s = p;
@ -294,21 +289,6 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */
*s = p;
*to = t;
}
/* count rune positions */
int
countposn(char *s, int n)
{
int i, j;
char *end;
for (i = 0, end = s+n; *s && s < end; i++){
j = mblen(s, n);
if(j <= 0)
j = 1;
s += j;
}
return(i);
}
/* pattern package error handler */

File diff suppressed because it is too large Load Diff

View File

@ -22,12 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
#define DEBUG
#include <stdio.h>
#include <math.h>
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <bio.h>
#include "awk.h"
#include "y.tab.h"
@ -46,7 +44,7 @@ Awkfloat *NF; /* number of fields in current record */
Awkfloat *NR; /* number of current record */
Awkfloat *FNR; /* number of current record in current file */
char **FILENAME; /* current filename argument */
Awkfloat *ARGC; /* number of arguments from command line */
Awkfloat *AARGC; /* number of arguments from command line */
char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
Awkfloat *RLENGTH; /* length of same */
@ -101,12 +99,12 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */
int i;
char temp[50];
ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
AARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
cp->sval = (char *) ARGVtab;
for (i = 0; i < ac; i++) {
sprintf(temp, "%d", i);
sprint(temp, "%d", i);
if (is_number(*av))
setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
else
@ -124,7 +122,7 @@ void envinit(char **envp) /* set up ENVIRON variable */
ENVtab = makesymtab(NSYMTAB);
cp->sval = (char *) ENVtab;
for ( ; *envp; envp++) {
if ((p = strchr(*envp, '=')) == NULL)
if ((p = strchr(*envp, '=')) == nil)
continue;
*p++ = 0; /* split into two strings at = */
if (is_number(p))
@ -142,9 +140,9 @@ Array *makesymtab(int n) /* make a new symbol table */
ap = (Array *) malloc(sizeof(Array));
tp = (Cell **) calloc(n, sizeof(Cell *));
if (ap == NULL || tp == NULL)
if (ap == nil || tp == nil)
FATAL("out of space in makesymtab");
ap->nelem = 0;
ap->nelemt = 0;
ap->size = n;
ap->tab = tp;
return(ap);
@ -159,10 +157,10 @@ void freesymtab(Cell *ap) /* free a symbol table */
if (!isarr(ap))
return;
tp = (Array *) ap->sval;
if (tp == NULL)
if (tp == nil)
return;
for (i = 0; i < tp->size; i++) {
for (cp = tp->tab[i]; cp != NULL; cp = temp) {
for (cp = tp->tab[i]; cp != nil; cp = temp) {
xfree(cp->nval);
if (freeable(cp))
xfree(cp->sval);
@ -178,14 +176,14 @@ void freesymtab(Cell *ap) /* free a symbol table */
void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
{
Array *tp;
Cell *p, *prev = NULL;
Cell *p, *prev = nil;
int h;
tp = (Array *) ap->sval;
h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
for (p = tp->tab[h]; p != nil; prev = p, p = p->cnext)
if (strcmp(s, p->nval) == 0) {
if (prev == NULL) /* 1st one */
if (prev == nil) /* 1st one */
tp->tab[h] = p->cnext;
else /* middle somewhere */
prev->cnext = p->cnext;
@ -193,7 +191,7 @@ void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
xfree(p->sval);
free(p->nval);
free(p);
tp->nelem--;
tp->nelemt--;
return;
}
}
@ -203,13 +201,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
int h;
Cell *p;
if (n != NULL && (p = lookup(n, tp)) != NULL) {
dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
if (n != nil && (p = lookup(n, tp)) != nil) {
dprint( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
p, p->nval, p->sval, p->fval, p->tval) );
return(p);
}
p = (Cell *) malloc(sizeof(Cell));
if (p == NULL)
if (p == nil)
FATAL("out of space for symbol table at %s", n);
p->nval = tostring(n);
p->sval = s ? tostring(s) : tostring("");
@ -217,13 +215,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
p->tval = t;
p->csub = CUNK;
p->ctype = OCELL;
tp->nelem++;
if (tp->nelem > FULLTAB * tp->size)
tp->nelemt++;
if (tp->nelemt > FULLTAB * tp->size)
rehash(tp);
h = hash(n, tp->size);
p->cnext = tp->tab[h];
tp->tab[h] = p;
dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
dprint( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
p, p->nval, p->sval, p->fval, p->tval) );
return(p);
}
@ -244,7 +242,7 @@ void rehash(Array *tp) /* rehash items in small table into big one */
nsz = GROWTAB * tp->size;
np = (Cell **) calloc(nsz, sizeof(Cell *));
if (np == NULL) /* can't do it, but can keep running. */
if (np == nil) /* can't do it, but can keep running. */
return; /* someone else will run out later. */
for (i = 0; i < tp->size; i++) {
for (cp = tp->tab[i]; cp; cp = op) {
@ -265,10 +263,10 @@ Cell *lookup(char *s, Array *tp) /* look for s in tp */
int h;
h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; p = p->cnext)
for (p = tp->tab[h]; p != nil; p = p->cnext)
if (strcmp(s, p->nval) == 0)
return(p); /* found it */
return(NULL); /* not found */
return(nil); /* not found */
}
Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
@ -282,7 +280,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
dprintf( ("setting field %d to %g\n", fldno, f) );
dprint( ("setting field %d to %g\n", fldno, f) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
@ -291,7 +289,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
xfree(vp->sval); /* free any previous string */
vp->tval &= ~STR; /* mark string invalid */
vp->tval |= NUM; /* mark number ok */
dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
dprint( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
return vp->fval = f;
}
@ -310,7 +308,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
char *t;
int fldno;
dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
dprint( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
if (isfld(vp)) {
@ -318,7 +316,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
dprint( ("setting field %d to %s (%p)\n", fldno, s, s) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
@ -329,7 +327,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */
if (freeable(vp))
xfree(vp->sval);
vp->tval &= ~DONTFREE;
dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
dprint( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
return(vp->sval = t);
}
@ -346,7 +344,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
if (is_number(vp->sval) && !(vp->tval&CON))
vp->tval |= NUM; /* make NUM only sparingly */
}
dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
dprint( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
return(vp->fval);
}
@ -365,14 +363,14 @@ char *getsval(Cell *vp) /* get string val of a Cell */
if (freeable(vp))
xfree(vp->sval);
if (modf(vp->fval, &dtemp) == 0) /* it's integral */
sprintf(s, "%.30g", vp->fval);
sprint(s, "%.30g", vp->fval);
else
sprintf(s, *CONVFMT, vp->fval);
sprint(s, *CONVFMT, vp->fval);
vp->sval = tostring(s);
vp->tval &= ~DONTFREE;
vp->tval |= STR;
}
dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
dprint( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
return(vp->sval);
}
@ -381,7 +379,7 @@ char *tostring(char *s) /* make a copy of string s */
char *p;
p = (char *) malloc(strlen(s)+1);
if (p == NULL)
if (p == nil)
FATAL("out of space in tostring on %s", s);
strcpy(p, s);
return(p);
@ -393,7 +391,7 @@ char *qstring(char *s, int delim) /* collect string up to next delim */
int c, n;
char *buf, *bp;
if ((buf = (char *) malloc(strlen(s)+3)) == NULL)
if ((buf = (char *) malloc(strlen(s)+3)) == nil)
FATAL( "out of space in qstring(%s)", s);
for (bp = buf; (c = *s) != delim; s++) {
if (c == '\n')
@ -429,6 +427,6 @@ char *qstring(char *s, int delim) /* collect string up to next delim */
}
}
}
*bp++ = 0;
*bp = 0;
return buf;
}

View File

@ -1,7 +1,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
#include "regexp.h"
#include "hash.h"
Hash hash;

View File

@ -1,7 +1,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
#include "regexp.h"
#include "hash.h"
enum

View File

@ -2,8 +2,8 @@
#include <libc.h>
#include <bin.h>
#include <bio.h>
#include <regexp.h>
#include "/sys/src/libregexp/regcomp.h"
#include "regexp.h"
#include "regcomp.h"
#include "dfa.h"
void rdump(Reprog*);

View File

@ -1,7 +1,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
#include "regexp.h"
#include "/sys/src/libregexp/regcomp.h"
#include "dfa.h"

View File

@ -7,7 +7,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
#include "regexp.h"
#include <ctype.h>
#include "dfa.h"

View File

@ -4,7 +4,7 @@
#include <u.h>
#include <libc.h>
#include "regexp.h"
#include "/sys/src/libregexp/regcomp.h"
#include "regcomp.h"
#define TRUE 1
#define FALSE 0

View File

@ -1,7 +1,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
#include "regexp.h"
#include "dfa.h"
/***

View File

@ -6,12 +6,12 @@ OFILES=\
regerror.$O\
regexec.$O\
regsub.$O\
regaux.$O\
rregexec.$O\
rregsub.$O\
regprint.$O\
HFILES=/sys/include/regexp.h\
regcomp.h\
regimpl.h\
UPDATE=\
mkfile\
@ -21,8 +21,8 @@ UPDATE=\
</sys/src/cmd/mksyslib
test: test.$O $OFILES
$LD -o test $prereq
$O.regextest: tests/regextest.$O $LIB
$LD -o $target regextest.$O
test2: test2.$O $OFILES
$LD -o test2 $prereq
$O.sysregextest: tests/sysregextest.$O
$LD -o $target sysregextest.$O

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
#include <u.h>
#include <libc.h>
#include "regexp.h"
#include <regexp.h>
void
regerror(char *s)

View File

@ -1,232 +1,190 @@
#include <u.h>
#include <libc.h>
#include "regexp.h"
#include "regcomp.h"
#include <regexp.h>
#include "regimpl.h"
/*
* return 0 if no match
* >0 if a match
* <0 if we ran out of _relist space
*/
static int
regexec1(Reprog *progp, /* program to run */
char *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j
)
typedef struct RethreadQ RethreadQ;
struct RethreadQ
{
int flag=0;
Reinst *inst;
Relist *tlp;
char *s;
int i, checkstart;
Rune r, *rp, *ep;
int n;
Relist* tl; /* This list, next list */
Relist* nl;
Relist* tle; /* ends of this and next list */
Relist* nle;
int match;
char *p;
Rethread *head;
Rethread **tail;
};
match = 0;
checkstart = j->starttype;
if(mp)
for(i=0; i<ms; i++) {
mp[i].sp = 0;
mp[i].ep = 0;
int
regexec(Reprog *prog, char *str, Resub *sem, int msize)
{
RethreadQ lists[2], *clist, *nlist, *tmp;
Rethread *t, *nextthr, **availthr;
Reinst *curinst;
Rune r;
char *sp, *ep, endc;
int i, match, first, gen, matchpri, pri;
if(msize > NSUBEXPM)
msize = NSUBEXPM;
if(prog->startinst->gen != 0) {
for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
curinst->gen = 0;
}
clist = lists;
clist->head = nil;
clist->tail = &clist->head;
nlist = lists + 1;
nlist->head = nil;
nlist->tail = &nlist->head;
for(i = 0; i < prog->nthr; i++)
prog->thrpool[i] = prog->threads + i;
availthr = prog->thrpool + prog->nthr;
pri = matchpri = gen = match = 0;
sp = str;
ep = nil;
endc = '\0';
if(sem != nil && msize > 0) {
if(sem->sp != nil)
sp = sem->sp;
if(sem->ep != nil && *sem->ep != '\0') {
ep = sem->ep;
endc = *sem->ep;
*sem->ep = '\0';
}
j->relist[0][0].inst = 0;
j->relist[1][0].inst = 0;
/* Execute machine once for each character, including terminal NUL */
s = j->starts;
do{
/* fast check for first char */
if(checkstart) {
switch(j->starttype) {
case RUNE:
p = utfrune(s, j->startchar);
if(p == 0 || s == j->eol)
return match;
s = p;
break;
case BOL:
if(s == bol)
break;
p = utfrune(s, '\n');
if(p == 0 || s == j->eol)
return match;
s = p+1;
}
r = Runemax + 1;
for(; r != L'\0'; sp += i) {
gen++;
i = chartorune(&r, sp);
first = 1;
t = clist->head;
if(t == nil)
goto Start;
curinst = t->pc;
Again:
if(curinst->gen == gen)
goto Done;
curinst->gen = gen;
switch(curinst->op) {
case ORUNE:
if(r != curinst->r)
goto Done;
case OANY: /* fallthrough */
Any:
nextthr = t->next;
t->pc = curinst + 1;
t->next = nil;
*nlist->tail = t;
nlist->tail = &t->next;
if(nextthr == nil)
break;
t = nextthr;
curinst = t->pc;
goto Again;
case OCLASS:
Class:
if(r < curinst->r)
goto Done;
if(r > curinst->r1) {
curinst++;
goto Class;
}
}
r = *(uchar*)s;
if(r < Runeself)
n = 1;
else
n = chartorune(&r, s);
/* switch run lists */
tl = j->relist[flag];
tle = j->reliste[flag];
nl = j->relist[flag^=1];
nle = j->reliste[flag];
nl->inst = 0;
/* Add first instruction to current list */
if(match == 0)
_renewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
for(inst = tlp->inst; ; inst = inst->next){
switch(inst->type){
case RUNE: /* regular character */
if(inst->r == r){
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
}
break;
case LBRA:
tlp->se.m[inst->subid].sp = s;
continue;
case RBRA:
tlp->se.m[inst->subid].ep = s;
continue;
case ANY:
if(r != '\n')
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case ANYNL:
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case BOL:
if(s == bol || *(s-1) == '\n')
continue;
break;
case EOL:
if(s == j->eol || r == 0 || r == '\n')
continue;
break;
case CCLASS:
ep = inst->cp->end;
for(rp = inst->cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
}
break;
case NCCLASS:
ep = inst->cp->end;
for(rp = inst->cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case OR:
/* evaluate right choice later */
if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
return -1;
/* efficiency: advance and re-evaluate */
continue;
case END: /* Match! */
match = 1;
tlp->se.m[0].ep = s;
if(mp != 0)
_renewmatch(mp, ms, &tlp->se);
break;
nextthr = t->next;
t->pc = curinst->a;
t->next = nil;
*nlist->tail = t;
nlist->tail = &t->next;
if(nextthr == nil)
break;
t = nextthr;
curinst = t->pc;
goto Again;
case ONOTNL:
if(r != L'\n') {
curinst++;
goto Again;
}
goto Done;
case OBOL:
if(sp == str || sp[-1] == '\n') {
curinst++;
goto Again;
}
goto Done;
case OEOL:
if(r == L'\0' && ep == nil) {
curinst++;
goto Again;
}
if(r == L'\n')
goto Any;
goto Done;
case OJMP:
curinst = curinst->a;
goto Again;
case OSPLIT:
nextthr = *--availthr;
nextthr->pc = curinst->b;
if(msize > 0)
memcpy(nextthr->sem, t->sem, sizeof(Resub)*msize);
nextthr->pri = t->pri;
nextthr->next = t->next;
t->next = nextthr;
curinst = curinst->a;
goto Again;
case OSAVE:
if(curinst->sub < msize)
t->sem[curinst->sub].sp = sp;
curinst++;
goto Again;
case OUNSAVE:
if(curinst->sub == 0) {
/* "Highest" priority is the left-most longest. */
if (t->pri > matchpri)
goto Done;
match = 1;
matchpri = t->pri;
if(sem != nil && msize > 0) {
memcpy(sem, t->sem, sizeof(Resub)*msize);
sem->ep = sp;
}
break;
goto Done;
}
if(curinst->sub < msize)
t->sem[curinst->sub].ep = sp;
curinst++;
goto Again;
Done:
*availthr++ = t;
t = t->next;
if(t == nil)
break;
curinst = t->pc;
goto Again;
}
if(s == j->eol)
Start:
/* Start again once if we haven't found anything. */
if(first == 1 && match == 0) {
first = 0;
t = *--availthr;
if(msize > 0)
memset(t->sem, 0, sizeof(Resub)*msize);
/* "Lower" priority thread */
t->pri = matchpri = pri++;
t->next = nil;
curinst = prog->startinst;
goto Again;
}
/* If we have a match and no extant threads, we are done. */
if(match == 1 && nlist->head == nil)
break;
checkstart = j->starttype && nl->inst==0;
s += n;
}while(r);
tmp = clist;
clist = nlist;
nlist = tmp;
nlist->head = nil;
nlist->tail = &nlist->head;
}
if(ep != nil)
*ep = endc;
return match;
}
static int
regexec2(Reprog *progp, /* program to run */
char *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j
)
{
int rv;
Relist *relist0, *relist1;
/* mark space */
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist0 == nil)
return -1;
relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist1 == nil){
free(relist1);
return -1;
}
j->relist[0] = relist0;
j->relist[1] = relist1;
j->reliste[0] = relist0 + BIGLISTSIZE - 2;
j->reliste[1] = relist1 + BIGLISTSIZE - 2;
rv = regexec1(progp, bol, mp, ms, j);
free(relist0);
free(relist1);
return rv;
}
extern int
regexec(Reprog *progp, /* program to run */
char *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms) /* number of elements at mp */
{
Reljunk j;
Relist relist0[LISTSIZE], relist1[LISTSIZE];
int rv;
/*
* use user-specified starting/ending location if specified
*/
j.starts = bol;
j.eol = 0;
if(mp && ms>0){
if(mp->sp)
j.starts = mp->sp;
if(mp->ep)
j.eol = mp->ep;
}
j.starttype = 0;
j.startchar = 0;
if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
j.starttype = RUNE;
j.startchar = progp->startinst->r;
}
if(progp->startinst->type == BOL)
j.starttype = BOL;
/* mark space */
j.relist[0] = relist0;
j.relist[1] = relist1;
j.reliste[0] = relist0 + nelem(relist0) - 2;
j.reliste[1] = relist1 + nelem(relist1) - 2;
rv = regexec1(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
rv = regexec2(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
return -1;
}

104
sys/src/libregexp/regimpl.h Normal file
View File

@ -0,0 +1,104 @@
enum
{
LANY = 0,
LBOL,
LCLASS,
LEND,
LEOL,
LLPAR,
LOR,
LREP,
LRPAR,
LRUNE,
TANY = 0,
TBOL,
TCAT,
TCLASS,
TEOL,
TNOTNL,
TOR,
TPLUS,
TQUES,
TRUNE,
TSTAR,
TSUB,
NSUBEXPM = 32
};
typedef struct Parselex Parselex;
typedef struct Renode Renode;
struct Parselex
{
/* Parse */
Renode *next;
Renode *nodes;
int sub;
int instrs;
jmp_buf exitenv;
/* Lex */
void (*getnextr)(Parselex*);
char *rawexp;
char *orig;
Rune rune;
Rune peek;
int peeklex;
int done;
int literal;
Rune cpairs[400+2];
int nc;
};
struct Renode
{
int op;
Renode *left;
Rune r;
union
{
Rune r1;
int sub;
Renode *right;
};
int nclass;
};
struct Rethread
{
Reinst *pc;
Resub sem[NSUBEXPM];
int pri;
Rethread *next;
};
struct Reinst
{
char op;
int gen;
Reinst *a;
union
{
Rune r;
int sub;
};
union
{
Rune r1;
Reinst *b;
};
};
static int lex(Parselex*);
static void getnextr(Parselex*);
static void getnextrlit(Parselex*);
static void getclass(Parselex*);
static Renode *e0(Parselex*);
static Renode *e1(Parselex*);
static Renode *e2(Parselex*);
static Renode *e3(Parselex*);
static Renode *buildclass(Parselex*);
static Renode *buildclassn(Parselex*);
static int pcmp(void*, void*);
static Reprog *regcomp1(char*, int, int);
static Reinst *compile(Renode*, Reprog*, int);
static Reinst *compile1(Renode*, Reinst*, int*, int);
static void prtree(Renode*, int, int);

View File

@ -0,0 +1,66 @@
#include <u.h>
#include <libc.h>
#include <regexp.h>
#include <regimpl.h>
static int
fmtprinst(Fmt *f, Reinst *inst)
{
int r;
r = fmtprint(f, "%p ", inst);
switch(inst->op) {
case ORUNE:
r += fmtprint(f, "ORUNE\t%C\n", inst->r);
break;
case ONOTNL:
r += fmtprint(f, "ONOTNL\n");
break;
case OCLASS:
r += fmtprint(f, "OCLASS\t%C-%C %p\n", inst->r, inst->r1, inst->a);
break;
case OSPLIT:
r += fmtprint(f, "OSPLIT\t%p %p\n", inst->a, inst->b);
break;
case OJMP:
r += fmtprint(f, "OJMP \t%p\n", inst->a);
break;
case OSAVE:
r += fmtprint(f, "OSAVE\t%d\n", inst->sub);
break;
case OUNSAVE:
r += fmtprint(f, "OUNSAVE\t%d\n", inst->sub);
break;
case OANY:
r += fmtprint(f, "OANY \t.\n");
break;
case OEOL:
r += fmtprint(f, "OEOL \t$\n");
break;
case OBOL:
r += fmtprint(f, "OBOL \t^\n");
break;
}
return r;
}
static int
fmtprprog(Fmt *f, Reprog *reprog)
{
Reinst *inst;
int r;
r = 0;
for(inst = reprog->startinst; inst < reprog->startinst + reprog->len; inst++)
r += fmtprinst(f, inst);
return r;
}
int
reprogfmt(Fmt *f)
{
Reprog *r;
r = va_arg(f->args, Reprog*);
return fmtprprog(f, r);
}

View File

@ -1,63 +1,66 @@
#include <u.h>
#include <libc.h>
#include "regexp.h"
#include <regexp.h>
/* substitute into one string using the matches from the last regexec() */
extern void
regsub(char *sp, /* source string */
char *dp, /* destination string */
int dlen,
Resub *mp, /* subexpression elements */
int ms) /* number of elements pointed to by mp */
void
regsub(char *src, char *dst, int dlen, Resub *match, int msize)
{
char *ssp, *ep;
int i;
char *ep, c;
ep = dp+dlen-1;
while(*sp != '\0'){
if(*sp == '\\'){
switch(*++sp){
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
i = *sp-'0';
if(mp!=0 && mp[i].sp != 0 && ms>i)
for(ssp = mp[i].sp;
ssp < mp[i].ep;
ssp++)
if(dp < ep)
*dp++ = *ssp;
break;
case '\\':
if(dp < ep)
*dp++ = '\\';
break;
case '\0':
sp--;
break;
default:
if(dp < ep)
*dp++ = *sp;
break;
ep = dst + dlen-1;
for(;*src != '\0'; src++) switch(*src) {
case '\\':
switch(*++src) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
i = *src - '0';
if(match != nil && i < msize && match[i].ep != nil) {
c = *match[i].ep;
*match[i].ep = '\0';
dst = strecpy(dst, ep+1, match[i].sp);
*match[i].ep = c;
}
}else if(*sp == '&'){
if(mp!=0 && mp[0].sp != 0 && ms>0)
for(ssp = mp[0].sp;
ssp < mp[0].ep; ssp++)
if(dp < ep)
*dp++ = *ssp;
}else{
if(dp < ep)
*dp++ = *sp;
break;
case '\\':
if(dst < ep)
*dst++ = '\\';
else
goto End;
break;
case '\0':
goto End;
default:
if(dst < ep)
*dst++ = *src;
else
goto End;
break;
}
sp++;
break;
case '&':
if(match != nil && msize > 0 && match[0].sp != nil) {
c = *match[0].ep;
*match[0].ep = '\0';
dst = strecpy(dst, ep+1, match[0].sp);
*match[0].ep = c;
}
break;
default:
if(dst < ep)
*dst++ = *src;
else
goto End;
break;
}
*dp = '\0';
End:
*dst = '\0';
}

View File

@ -1,212 +1,189 @@
#include <u.h>
#include <libc.h>
#include "regexp.h"
#include "regcomp.h"
#include <regexp.h>
#include "regimpl.h"
/*
* return 0 if no match
* >0 if a match
* <0 if we ran out of _relist space
*/
static int
rregexec1(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j)
typedef struct RethreadQ RethreadQ;
struct RethreadQ
{
int flag=0;
Reinst *inst;
Relist *tlp;
Rune *s;
int i, checkstart;
Rune r, *rp, *ep;
Relist* tl; /* This list, next list */
Relist* nl;
Relist* tle; /* ends of this and next list */
Relist* nle;
int match;
Rune *p;
Rethread *head;
Rethread **tail;
};
match = 0;
checkstart = j->startchar;
if(mp)
for(i=0; i<ms; i++) {
mp[i].rsp = 0;
mp[i].rep = 0;
int
rregexec(Reprog *prog, Rune *str, Resub *sem, int msize)
{
RethreadQ lists[2], *clist, *nlist, *tmp;
Rethread *t, *nextthr, **availthr;
Reinst *curinst;
Rune *rsp, *rep, endr, last;
int i, match, first, gen, pri, matchpri;
if(msize > NSUBEXPM)
msize = NSUBEXPM;
if(prog->startinst->gen != 0) {
for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
curinst->gen = 0;
}
clist = lists;
clist->head = nil;
clist->tail = &clist->head;
nlist = lists + 1;
nlist->head = nil;
nlist->tail = &nlist->head;
for(i = 0; i < prog->nthr; i++)
prog->thrpool[i] = prog->threads + i;
availthr = prog->thrpool + prog->nthr;
pri = matchpri = gen = match = 0;
rsp = str;
rep = nil;
endr = L'\0';
if(sem != nil && msize > 0) {
if(sem->rsp != nil)
rsp = sem->rsp;
if(sem->rep != nil && *sem->rep != L'\0') {
rep = sem->rep;
endr = *sem->rep;
*sem->rep = '\0';
}
j->relist[0][0].inst = 0;
j->relist[1][0].inst = 0;
/* Execute machine once for each character, including terminal NUL */
s = j->rstarts;
do{
/* fast check for first char */
if(checkstart) {
switch(j->starttype) {
case RUNE:
p = runestrchr(s, j->startchar);
if(p == 0 || s == j->reol)
return match;
s = p;
break;
case BOL:
if(s == bol)
break;
p = runestrchr(s, '\n');
if(p == 0 || s == j->reol)
return match;
s = p+1;
}
last = 1;
for(; last != L'\0'; rsp++) {
gen++;
last = *rsp;
first = 1;
t = clist->head;
if(t == nil)
goto Start;
curinst = t->pc;
Again:
if(curinst->gen == gen)
goto Done;
curinst->gen = gen;
switch(curinst->op) {
case ORUNE:
if(*rsp != curinst->r)
goto Done;
case OANY: /* fallthrough */
Any:
nextthr = t->next;
t->pc = curinst + 1;
t->next = nil;
*nlist->tail = t;
nlist->tail = &t->next;
if(nextthr == nil)
break;
t = nextthr;
curinst = t->pc;
goto Again;
case OCLASS:
Class:
if(*rsp < curinst->r)
goto Done;
if(*rsp > curinst->r1) {
curinst++;
goto Class;
}
}
r = *s;
/* switch run lists */
tl = j->relist[flag];
tle = j->reliste[flag];
nl = j->relist[flag^=1];
nle = j->reliste[flag];
nl->inst = 0;
/* Add first instruction to current list */
_rrenewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){
for(inst=tlp->inst; ; inst = inst->next){
switch(inst->type){
case RUNE: /* regular character */
if(inst->r == r)
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case LBRA:
tlp->se.m[inst->subid].rsp = s;
continue;
case RBRA:
tlp->se.m[inst->subid].rep = s;
continue;
case ANY:
if(r != '\n')
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case ANYNL:
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case BOL:
if(s == bol || *(s-1) == '\n')
continue;
break;
case EOL:
if(s == j->reol || r == 0 || r == '\n')
continue;
break;
case CCLASS:
ep = inst->cp->end;
for(rp = inst->cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
}
break;
case NCCLASS:
ep = inst->cp->end;
for(rp = inst->cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
return -1;
break;
case OR:
/* evaluate right choice later */
if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
return -1;
/* efficiency: advance and re-evaluate */
continue;
case END: /* Match! */
match = 1;
tlp->se.m[0].rep = s;
if(mp != 0)
_renewmatch(mp, ms, &tlp->se);
break;
nextthr = t->next;
t->pc = curinst->a;
t->next = nil;
*nlist->tail = t;
nlist->tail = &t->next;
if(nextthr == nil)
break;
t = nextthr;
curinst = t->pc;
goto Again;
case ONOTNL:
if(*rsp != L'\n') {
curinst++;
goto Again;
}
goto Done;
case OBOL:
if(rsp == str || rsp[-1] == L'\n') {
curinst++;
goto Again;
}
goto Done;
case OEOL:
if(*rsp == L'\0' && rep == nil) {
curinst++;
goto Again;
}
if(*rsp == '\n')
goto Any;
goto Done;
case OJMP:
curinst = curinst->a;
goto Again;
case OSPLIT:
nextthr = *--availthr;
nextthr->pc = curinst->b;
if(msize > 0)
memcpy(nextthr->sem, t->sem, sizeof(Resub)*msize);
nextthr->pri = t->pri;
nextthr->next = t->next;
t->next = nextthr;
curinst = curinst->a;
goto Again;
case OSAVE:
if(curinst->sub < msize)
t->sem[curinst->sub].rsp = rsp;
curinst++;
goto Again;
case OUNSAVE:
if(curinst->sub == 0) {
/* "Highest" priority is the left-most longest. */
if (t->pri > matchpri)
goto Done;
match = 1;
matchpri = t->pri;
if(sem != nil && msize > 0) {
memcpy(sem, t->sem, sizeof(Resub)*msize);
sem->rep = rsp;
}
break;
goto Done;
}
if(curinst->sub < msize)
t->sem[curinst->sub].rep = rsp;
curinst++;
goto Again;
Done:
*availthr++ = t;
t = t->next;
if(t == nil)
break;
curinst = t->pc;
goto Again;
}
if(s == j->reol)
Start:
/* Start again once if we haven't found anything. */
if(first == 1 && match == 0) {
first = 0;
t = *--availthr;
if(msize > 0)
memset(t->sem, 0, sizeof(Resub)*msize);
/* "Lower" priority thread */
t->pri = matchpri = pri++;
t->next = nil;
curinst = prog->startinst;
goto Again;
}
/* If we have a match and no extant threads, we are done. */
if(match == 1 && nlist->head == nil)
break;
checkstart = j->startchar && nl->inst==0;
s++;
}while(r);
tmp = clist;
clist = nlist;
nlist = tmp;
nlist->head = nil;
nlist->tail = &nlist->head;
}
if(rep != nil)
*rep = endr;
return match;
}
static int
rregexec2(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j
)
{
Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
/* mark space */
j->relist[0] = relist0;
j->relist[1] = relist1;
j->reliste[0] = relist0 + nelem(relist0) - 2;
j->reliste[1] = relist1 + nelem(relist1) - 2;
return rregexec1(progp, bol, mp, ms, j);
}
extern int
rregexec(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms) /* number of elements at mp */
{
Reljunk j;
Relist relist0[LISTSIZE], relist1[LISTSIZE];
int rv;
/*
* use user-specified starting/ending location if specified
*/
j.rstarts = bol;
j.reol = 0;
if(mp && ms>0){
if(mp->sp)
j.rstarts = mp->rsp;
if(mp->ep)
j.reol = mp->rep;
}
j.starttype = 0;
j.startchar = 0;
if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
j.starttype = RUNE;
j.startchar = progp->startinst->r;
}
if(progp->startinst->type == BOL)
j.starttype = BOL;
/* mark space */
j.relist[0] = relist0;
j.relist[1] = relist1;
j.reliste[0] = relist0 + nelem(relist0) - 2;
j.reliste[1] = relist1 + nelem(relist1) - 2;
rv = rregexec1(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
rv = rregexec2(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
return -1;
}

View File

@ -1,64 +1,66 @@
#include <u.h>
#include <libc.h>
#include "regexp.h"
#include <regexp.h>
/* substitute into one string using the matches from the last regexec() */
extern void
rregsub(Rune *sp, /* source string */
Rune *dp, /* destination string */
int dlen,
Resub *mp, /* subexpression elements */
int ms) /* number of elements pointed to by mp */
void
rregsub(Rune *src, Rune *dst, int dlen, Resub *match, int msize)
{
Rune *ssp, *ep;
int i;
Rune *ep, r;
ep = dp+(dlen/sizeof(Rune))-1;
while(*sp != '\0'){
if(*sp == '\\'){
switch(*++sp){
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
i = *sp-'0';
if(mp[i].rsp != 0 && mp!=0 && ms>i)
for(ssp = mp[i].rsp;
ssp < mp[i].rep;
ssp++)
if(dp < ep)
*dp++ = *ssp;
break;
case '\\':
if(dp < ep)
*dp++ = '\\';
break;
case '\0':
sp--;
break;
default:
if(dp < ep)
*dp++ = *sp;
break;
ep = dst + dlen-1;
for(;*src != L'\0'; src++) switch(*src) {
case L'\\':
switch(*++src) {
case L'0':
case L'1':
case L'2':
case L'3':
case L'4':
case L'5':
case L'6':
case L'7':
case L'8':
case L'9':
i = *src - L'0';
if(match != nil && i < msize && match[i].rsp != nil) {
r = *match[i].rep;
*match[i].rep = L'\0';
dst = runestrecpy(dst, ep+1, match[i].rsp);
*match[i].rep = r;
}
}else if(*sp == '&'){
if(mp[0].rsp != 0 && mp!=0 && ms>0)
if(mp[0].rsp != 0)
for(ssp = mp[0].rsp;
ssp < mp[0].rep; ssp++)
if(dp < ep)
*dp++ = *ssp;
}else{
if(dp < ep)
*dp++ = *sp;
break;
case L'\\':
if(dst < ep)
*dst++ = L'\\';
else
goto End;
break;
case L'\0':
goto End;
default:
if(dst < ep)
*dst++ = *src;
else
goto End;
break;
}
sp++;
break;
case L'&':
if(match != nil && msize > 0 && match[0].rsp != nil) {
r = *match[0].rep;
*match[0].rep = L'\0';
dst = runestrecpy(dst, ep+1, match[0].rsp);
*match[0].rep = r;
}
break;
default:
if(dst < ep)
*dst++ = *src;
else
goto End;
break;
}
*dp = '\0';
End:
*dst = L'\0';
}