Merging pcre-6 branch into trunk
parent
d2d4230815
commit
c07279dd40
|
@ -14,6 +14,7 @@ AM_CFLAGS = \
|
||||||
-I$(top_srcdir) \
|
-I$(top_srcdir) \
|
||||||
-I$(top_srcdir)/$(MOO_SRC_PREFIX) \
|
-I$(top_srcdir)/$(MOO_SRC_PREFIX) \
|
||||||
-I$(top_builddir)/$(MOO_SRC_PREFIX) \
|
-I$(top_builddir)/$(MOO_SRC_PREFIX) \
|
||||||
|
-I$(top_builddir)/$(MOO_SRC_PREFIX)/mooutils/pcre \
|
||||||
$(MOO_CFLAGS) \
|
$(MOO_CFLAGS) \
|
||||||
$(PYTHON_INCLUDES) \
|
$(PYTHON_INCLUDES) \
|
||||||
$(PYGTK_CFLAGS) \
|
$(PYGTK_CFLAGS) \
|
||||||
|
|
|
@ -65,6 +65,9 @@ MOO_AC_XDGMIME
|
||||||
# Python stuff
|
# Python stuff
|
||||||
MOO_AC_PYGTK
|
MOO_AC_PYGTK
|
||||||
|
|
||||||
|
# pcre
|
||||||
|
MOO_AC_PCRE
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# Python module
|
# Python module
|
||||||
|
@ -224,6 +227,7 @@ AC_OUTPUT([
|
||||||
Makefile
|
Makefile
|
||||||
moo/moo-config.h
|
moo/moo-config.h
|
||||||
moo/mooterm/termhelper_res.rc
|
moo/mooterm/termhelper_res.rc
|
||||||
|
moo/mooutils/pcre/pcre.h
|
||||||
tests/pyapp.py
|
tests/pyapp.py
|
||||||
m4/Makefile
|
m4/Makefile
|
||||||
moo.pc
|
moo.pc
|
||||||
|
|
|
@ -4,6 +4,7 @@ EXTRA_DIST = \
|
||||||
moo-funcs.m4 \
|
moo-funcs.m4 \
|
||||||
moo-gtk.m4 \
|
moo-gtk.m4 \
|
||||||
moo-os.m4 \
|
moo-os.m4 \
|
||||||
|
moo-pcre.m4 \
|
||||||
moo-pygtk.m4 \
|
moo-pygtk.m4 \
|
||||||
moo-python.m4 \
|
moo-python.m4 \
|
||||||
moo-windres.m4 \
|
moo-windres.m4 \
|
||||||
|
|
|
@ -0,0 +1,122 @@
|
||||||
|
##############################################################################
|
||||||
|
# MOO_AC_PCRE
|
||||||
|
# This is essentially pcre's configure.in, contains checks and defines
|
||||||
|
# needed for pcre
|
||||||
|
#
|
||||||
|
AC_DEFUN([MOO_AC_PCRE],[
|
||||||
|
|
||||||
|
dnl Provide the current PCRE version information. Do not use numbers
|
||||||
|
dnl with leading zeros for the minor version, as they end up in a C
|
||||||
|
dnl macro, and may be treated as octal constants. Stick to single
|
||||||
|
dnl digits for minor numbers less than 10. There are unlikely to be
|
||||||
|
dnl that many releases anyway.
|
||||||
|
|
||||||
|
PCRE_MAJOR=6
|
||||||
|
PCRE_MINOR=4
|
||||||
|
PCRE_DATE=05-Sep-2005
|
||||||
|
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
|
||||||
|
|
||||||
|
dnl Default values for miscellaneous macros
|
||||||
|
|
||||||
|
AC_DEFINE(POSIX_MALLOC_THRESHOLD, 10, [POSIX_MALLOC_THRESHOLD])
|
||||||
|
|
||||||
|
AC_HEADER_STDC
|
||||||
|
AC_CHECK_HEADERS(limits.h)
|
||||||
|
|
||||||
|
dnl Checks for typedefs, structures, and compiler characteristics.
|
||||||
|
|
||||||
|
AC_C_CONST
|
||||||
|
AC_TYPE_SIZE_T
|
||||||
|
|
||||||
|
AC_CHECK_TYPES([long long], [pcre_have_long_long="1"], [pcre_have_long_long="0"])
|
||||||
|
AC_CHECK_TYPES([unsigned long long], [pcre_have_ulong_long="1"], [pcre_have_ulong_long="0"])
|
||||||
|
AC_SUBST(pcre_have_long_long)
|
||||||
|
AC_SUBST(pcre_have_ulong_long)
|
||||||
|
|
||||||
|
dnl Checks for library functions.
|
||||||
|
|
||||||
|
AC_CHECK_FUNCS(bcopy memmove strerror strtoq strtoll)
|
||||||
|
|
||||||
|
dnl Handle --enable-utf8
|
||||||
|
AC_DEFINE(SUPPORT_UTF8, , [SUPPORT_UTF8])
|
||||||
|
|
||||||
|
# XXX
|
||||||
|
# dnl Handle --enable-unicode-properties
|
||||||
|
AC_DEFINE(SUPPORT_UCP, , [SUPPORT_UCP])
|
||||||
|
|
||||||
|
|
||||||
|
AC_DEFINE(PCRE_EXPORT, , [PCRE_EXPORT - empty since we do not need pcre api be exported])
|
||||||
|
|
||||||
|
# dnl Handle --enable-newline-is-cr
|
||||||
|
#
|
||||||
|
# AC_ARG_ENABLE(newline-is-cr,
|
||||||
|
# [ --enable-newline-is-cr use CR as the newline character],
|
||||||
|
# if test "$enableval" = "yes"; then
|
||||||
|
# NEWLINE=-DNEWLINE=13
|
||||||
|
# fi
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# dnl Handle --enable-newline-is-lf
|
||||||
|
#
|
||||||
|
# AC_ARG_ENABLE(newline-is-lf,
|
||||||
|
# [ --enable-newline-is-lf use LF as the newline character],
|
||||||
|
# if test "$enableval" = "yes"; then
|
||||||
|
# NEWLINE=-DNEWLINE=10
|
||||||
|
# fi
|
||||||
|
# )
|
||||||
|
AC_DEFINE(NEWLINE, '\n', [The value of NEWLINE determines the newline character])
|
||||||
|
|
||||||
|
# XXX
|
||||||
|
# dnl Handle --enable-ebcdic
|
||||||
|
#
|
||||||
|
# AC_ARG_ENABLE(ebcdic,
|
||||||
|
# [ --enable-ebcdic assume EBCDIC coding rather than ASCII],
|
||||||
|
# if test "$enableval" == "yes"; then
|
||||||
|
# EBCDIC=-DEBCDIC=1
|
||||||
|
# fi
|
||||||
|
# )
|
||||||
|
AC_DEFINE(EBCDIC, 0, [If you are compiling for a system that uses EBCDIC instead of ASCII dnl
|
||||||
|
character codes, define this macro as 1.])
|
||||||
|
|
||||||
|
|
||||||
|
# dnl Handle --disable-stack-for-recursion
|
||||||
|
#
|
||||||
|
# AC_ARG_ENABLE(stack-for-recursion,
|
||||||
|
# [ --disable-stack-for-recursion disable use of stack recursion when matching],
|
||||||
|
# if test "$enableval" = "no"; then
|
||||||
|
# NO_RECURSE=-DNO_RECURSE
|
||||||
|
# fi
|
||||||
|
# )
|
||||||
|
|
||||||
|
# dnl Handle --with-link-size=n
|
||||||
|
# AC_ARG_WITH(link-size,
|
||||||
|
# [ --with-link-size=2 internal link size (2, 3, or 4 allowed)],
|
||||||
|
# LINK_SIZE=-DLINK_SIZE=$withval
|
||||||
|
# )
|
||||||
|
AC_DEFINE(LINK_SIZE, 2, [The value of LINK_SIZE determines the number of bytes used to store dnl
|
||||||
|
links as offsets within the compiled regex. The default is 2, which allows for dnl
|
||||||
|
compiled patterns up to 64K long. This covers the vast majority of cases. dnl
|
||||||
|
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for dnl
|
||||||
|
longer patterns in extreme cases.])
|
||||||
|
|
||||||
|
|
||||||
|
# dnl Handle --with-match_limit=n
|
||||||
|
#
|
||||||
|
# AC_ARG_WITH(match-limit,
|
||||||
|
# [ --with-match-limit=10000000 default limit on internal looping)],
|
||||||
|
# MATCH_LIMIT=-DMATCH_LIMIT=$withval
|
||||||
|
# )
|
||||||
|
AC_DEFINE(MATCH_LIMIT, 10000000, [The value of MATCH_LIMIT determines the default number of times the match() dnl
|
||||||
|
function can be called during a single execution of pcre_exec(). (There is a dnl
|
||||||
|
runtime method of setting a different limit.) The limit exists in order to dnl
|
||||||
|
catch runaway regular expressions that take for ever to determine that they do dnl
|
||||||
|
not match. The default is set very large so that it does not accidentally catch dnl
|
||||||
|
legitimate cases.])
|
||||||
|
|
||||||
|
|
||||||
|
AC_SUBST(PCRE_MAJOR)
|
||||||
|
AC_SUBST(PCRE_MINOR)
|
||||||
|
AC_SUBST(PCRE_DATE)
|
||||||
|
AC_SUBST(PCRE_VERSION)
|
||||||
|
|
||||||
|
]) # end of MOO_AC_PCRE
|
|
@ -43,11 +43,11 @@ nodist_moopython_sources =
|
||||||
moopython_cleanfiles =
|
moopython_cleanfiles =
|
||||||
|
|
||||||
mooedit_defs_files = \
|
mooedit_defs_files = \
|
||||||
$(moopython_srcdir)/mooeditor.defs \
|
$(moopython)/mooeditor.defs \
|
||||||
$(moopython_srcdir)/mooplugin.defs
|
$(moopython)/mooplugin.defs
|
||||||
|
|
||||||
# mooutils_override_files = \
|
# mooutils_override_files = \
|
||||||
# $(moopython_srcdir)/moowindow.override
|
# $(moopython)/moowindow.override
|
||||||
|
|
||||||
moopython_extra_dist = \
|
moopython_extra_dist = \
|
||||||
$(moopython_plugins) \
|
$(moopython_plugins) \
|
||||||
|
|
|
@ -155,16 +155,40 @@ moo_extra_dist += \
|
||||||
moopcre = $(mooutils_prefix)/pcre
|
moopcre = $(mooutils_prefix)/pcre
|
||||||
|
|
||||||
moopcre_sources = \
|
moopcre_sources = \
|
||||||
$(moopcre)/get.c \
|
$(moopcre)/pcre_chartables.c \
|
||||||
$(moopcre)/internal.h \
|
$(moopcre)/pcre_compile.c \
|
||||||
$(moopcre)/pcre-config.h \
|
$(moopcre)/pcre_config.c \
|
||||||
$(moopcre)/maketables.c \
|
$(moopcre)/pcre_exec.c \
|
||||||
$(moopcre)/pcre.c \
|
$(moopcre)/pcre_fullinfo.c \
|
||||||
$(moopcre)/pcre.h \
|
$(moopcre)/pcre_get.c \
|
||||||
$(moopcre)/study.c
|
$(moopcre)/pcre_globals.c \
|
||||||
|
$(moopcre)/pcre_info.c \
|
||||||
|
$(moopcre)/pcre_internal.h \
|
||||||
|
$(moopcre)/pcre_maketables.c \
|
||||||
|
$(moopcre)/pcre_ord2utf8.c \
|
||||||
|
$(moopcre)/pcre_refcount.c \
|
||||||
|
$(moopcre)/pcre_study.c \
|
||||||
|
$(moopcre)/pcre_tables.c \
|
||||||
|
$(moopcre)/pcre_try_flipped.c \
|
||||||
|
$(moopcre)/pcre_ucp_findchar.c \
|
||||||
|
$(moopcre)/pcre_valid_utf8.c \
|
||||||
|
$(moopcre)/pcre_version.c \
|
||||||
|
$(moopcre)/pcre_xclass.c \
|
||||||
|
$(moopcre)/ucp.h \
|
||||||
|
$(moopcre)/ucpinternal.h
|
||||||
|
|
||||||
|
$(moopcre)/pcre_internal.h: $(moopcre)/pcre.h
|
||||||
|
|
||||||
moo_sources += $(moopcre_sources)
|
moo_sources += $(moopcre_sources)
|
||||||
moo_extra_dist += $(moopcre)/chartables.c
|
moo_extra_dist += \
|
||||||
|
$(moopcre)/AUTHORS \
|
||||||
|
$(moopcre)/COPYING \
|
||||||
|
$(moopcre)/ChangeLog \
|
||||||
|
$(moopcre)/NEWS \
|
||||||
|
$(moopcre)/dftables.c \
|
||||||
|
$(moopcre)/pcre_printint.src \
|
||||||
|
$(moopcre)/ucptable.c \
|
||||||
|
$(moopcre)/pcre.h.in
|
||||||
|
|
||||||
|
|
||||||
###########################################################################
|
###########################################################################
|
||||||
|
|
|
@ -53,7 +53,7 @@
|
||||||
|
|
||||||
#include "eggregex.h"
|
#include "eggregex.h"
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
#include "pcre/pcre.h"
|
#include "pcre.h"
|
||||||
|
|
||||||
/* FIXME when this is in glib */
|
/* FIXME when this is in glib */
|
||||||
#define _(s) s
|
#define _(s) s
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
THE MAIN PCRE LIBRARY
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Written by: Philip Hazel
|
||||||
|
Email local part: ph10
|
||||||
|
Email domain: cam.ac.uk
|
||||||
|
|
||||||
|
University of Cambridge Computing Service,
|
||||||
|
Cambridge, England. Phone: +44 1223 334714.
|
||||||
|
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
All rights reserved
|
||||||
|
|
||||||
|
|
||||||
|
THE C++ WRAPPER LIBRARY
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
Written by: Google Inc.
|
||||||
|
|
||||||
|
Copyright (c) 2005 Google Inc
|
||||||
|
All rights reserved
|
||||||
|
|
||||||
|
####
|
|
@ -0,0 +1,68 @@
|
||||||
|
PCRE LICENCE
|
||||||
|
------------
|
||||||
|
|
||||||
|
PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Release 6 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||||
|
specified below. The documentation for PCRE, supplied in the "doc"
|
||||||
|
directory, is distributed under the same terms as the software itself.
|
||||||
|
|
||||||
|
The basic library functions are written in C and are freestanding. Also
|
||||||
|
included in the distribution is a set of C++ wrapper functions.
|
||||||
|
|
||||||
|
|
||||||
|
THE BASIC LIBRARY FUNCTIONS
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
Written by: Philip Hazel
|
||||||
|
Email local part: ph10
|
||||||
|
Email domain: cam.ac.uk
|
||||||
|
|
||||||
|
University of Cambridge Computing Service,
|
||||||
|
Cambridge, England. Phone: +44 1223 334714.
|
||||||
|
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
THE C++ WRAPPER FUNCTIONS
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Contributed by: Google Inc.
|
||||||
|
|
||||||
|
Copyright (c) 2005, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
THE "BSD" LICENCE
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the name of Google
|
||||||
|
Inc. nor the names of their contributors may be used to endorse or
|
||||||
|
promote products derived from this software without specific prior
|
||||||
|
written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
End
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,233 @@
|
||||||
|
News about PCRE releases
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
Release 6.0 07-Jun-05
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
The release number has been increased to 6.0 because of the addition of several
|
||||||
|
major new pieces of functionality.
|
||||||
|
|
||||||
|
A new function, pcre_dfa_exec(), which implements pattern matching using a DFA
|
||||||
|
algorithm, has been added. This has a number of advantages for certain cases,
|
||||||
|
though it does run more slowly, and lacks the ability to capture substrings. On
|
||||||
|
the other hand, it does find all matches, not just the first, and it works
|
||||||
|
better for partial matching. The pcrematching man page discusses the
|
||||||
|
differences.
|
||||||
|
|
||||||
|
The pcretest program has been enhanced so that it can make use of the new
|
||||||
|
pcre_dfa_exec() matching function and the extra features it provides.
|
||||||
|
|
||||||
|
The distribution now includes a C++ wrapper library. This is built
|
||||||
|
automatically if a C++ compiler is found. The pcrecpp man page discusses this
|
||||||
|
interface.
|
||||||
|
|
||||||
|
The code itself has been re-organized into many more files, one for each
|
||||||
|
function, so it no longer requires everything to be linked in when static
|
||||||
|
linkage is used. As a consequence, some internal functions have had to have
|
||||||
|
their names exposed. These functions all have names starting with _pcre_. They
|
||||||
|
are undocumented, and are not intended for use by outside callers.
|
||||||
|
|
||||||
|
The pcregrep program has been enhanced with new functionality such as
|
||||||
|
multiline-matching and options for output more matching context. See the
|
||||||
|
ChangeLog for a complete list of changes to the library and the utility
|
||||||
|
programs.
|
||||||
|
|
||||||
|
|
||||||
|
Release 5.0 13-Sep-04
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
The licence under which PCRE is released has been changed to the more
|
||||||
|
conventional "BSD" licence.
|
||||||
|
|
||||||
|
In the code, some bugs have been fixed, and there are also some major changes
|
||||||
|
in this release (which is why I've increased the number to 5.0). Some changes
|
||||||
|
are internal rearrangements, and some provide a number of new facilities. The
|
||||||
|
new features are:
|
||||||
|
|
||||||
|
1. There's an "automatic callout" feature that inserts callouts before every
|
||||||
|
item in the regex, and there's a new callout field that gives the position
|
||||||
|
in the pattern - useful for debugging and tracing.
|
||||||
|
|
||||||
|
2. The extra_data structure can now be used to pass in a set of character
|
||||||
|
tables at exec time. This is useful if compiled regex are saved and re-used
|
||||||
|
at a later time when the tables may not be at the same address. If the
|
||||||
|
default internal tables are used, the pointer saved with the compiled
|
||||||
|
pattern is now set to NULL, which means that you don't need to do anything
|
||||||
|
special unless you are using custom tables.
|
||||||
|
|
||||||
|
3. It is possible, with some restrictions on the content of the regex, to
|
||||||
|
request "partial" matching. A special return code is given if all of the
|
||||||
|
subject string matched part of the regex. This could be useful for testing
|
||||||
|
an input field as it is being typed.
|
||||||
|
|
||||||
|
4. There is now some optional support for Unicode character properties, which
|
||||||
|
means that the patterns items such as \p{Lu} and \X can now be used. Only
|
||||||
|
the general category properties are supported. If PCRE is compiled with this
|
||||||
|
support, an additional 90K data structure is include, which increases the
|
||||||
|
size of the library dramatically.
|
||||||
|
|
||||||
|
5. There is support for saving compiled patterns and re-using them later.
|
||||||
|
|
||||||
|
6. There is support for running regular expressions that were compiled on a
|
||||||
|
different host with the opposite endianness.
|
||||||
|
|
||||||
|
7. The pcretest program has been extended to accommodate the new features.
|
||||||
|
|
||||||
|
The main internal rearrangement is that sequences of literal characters are no
|
||||||
|
longer handled as strings. Instead, each character is handled on its own. This
|
||||||
|
makes some UTF-8 handling easier, and makes the support of partial matching
|
||||||
|
possible. Compiled patterns containing long literal strings will be larger as a
|
||||||
|
result of this change; I hope that performance will not be much affected.
|
||||||
|
|
||||||
|
|
||||||
|
Release 4.5 01-Dec-03
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Again mainly a bug-fix and tidying release, with only a couple of new features:
|
||||||
|
|
||||||
|
1. It's possible now to compile PCRE so that it does not use recursive
|
||||||
|
function calls when matching. Instead it gets memory from the heap. This slows
|
||||||
|
things down, but may be necessary on systems with limited stacks.
|
||||||
|
|
||||||
|
2. UTF-8 string checking has been tightened to reject overlong sequences and to
|
||||||
|
check that a starting offset points to the start of a character. Failure of the
|
||||||
|
latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.
|
||||||
|
|
||||||
|
3. PCRE can now be compiled for systems that use EBCDIC code.
|
||||||
|
|
||||||
|
|
||||||
|
Release 4.4 21-Aug-03
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
This is mainly a bug-fix and tidying release. The only new feature is that PCRE
|
||||||
|
checks UTF-8 strings for validity by default. There is an option to suppress
|
||||||
|
this, just in case anybody wants that teeny extra bit of performance.
|
||||||
|
|
||||||
|
|
||||||
|
Releases 4.1 - 4.3
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Sorry, I forgot about updating the NEWS file for these releases. Please take a
|
||||||
|
look at ChangeLog.
|
||||||
|
|
||||||
|
|
||||||
|
Release 4.0 17-Feb-03
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
There have been a lot of changes for the 4.0 release, adding additional
|
||||||
|
functionality and mending bugs. Below is a list of the highlights of the new
|
||||||
|
functionality. For full details of these features, please consult the
|
||||||
|
documentation. For a complete list of changes, see the ChangeLog file.
|
||||||
|
|
||||||
|
1. Support for Perl's \Q...\E escapes.
|
||||||
|
|
||||||
|
2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java
|
||||||
|
package. They provide some syntactic sugar for simple cases of "atomic
|
||||||
|
grouping".
|
||||||
|
|
||||||
|
3. Support for the \G assertion. It is true when the current matching position
|
||||||
|
is at the start point of the match.
|
||||||
|
|
||||||
|
4. A new feature that provides some of the functionality that Perl provides
|
||||||
|
with (?{...}). The facility is termed a "callout". The way it is done in PCRE
|
||||||
|
is for the caller to provide an optional function, by setting pcre_callout to
|
||||||
|
its entry point. To get the function called, the regex must include (?C) at
|
||||||
|
appropriate points.
|
||||||
|
|
||||||
|
5. Support for recursive calls to individual subpatterns. This makes it really
|
||||||
|
easy to get totally confused.
|
||||||
|
|
||||||
|
6. Support for named subpatterns. The Python syntax (?P<name>...) is used to
|
||||||
|
name a group.
|
||||||
|
|
||||||
|
7. Several extensions to UTF-8 support; it is now fairly complete. There is an
|
||||||
|
option for pcregrep to make it operate in UTF-8 mode.
|
||||||
|
|
||||||
|
8. The single man page has been split into a number of separate man pages.
|
||||||
|
These also give rise to individual HTML pages which are put in a separate
|
||||||
|
directory. There is an index.html page that lists them all. Some hyperlinking
|
||||||
|
between the pages has been installed.
|
||||||
|
|
||||||
|
|
||||||
|
Release 3.5 15-Aug-01
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
1. The configuring system has been upgraded to use later versions of autoconf
|
||||||
|
and libtool. By default it builds both a shared and a static library if the OS
|
||||||
|
supports it. You can use --disable-shared or --disable-static on the configure
|
||||||
|
command if you want only one of them.
|
||||||
|
|
||||||
|
2. The pcretest utility is now installed along with pcregrep because it is
|
||||||
|
useful for users (to test regexs) and by doing this, it automatically gets
|
||||||
|
relinked by libtool. The documentation has been turned into a man page, so
|
||||||
|
there are now .1, .txt, and .html versions in /doc.
|
||||||
|
|
||||||
|
3. Upgrades to pcregrep:
|
||||||
|
(i) Added long-form option names like gnu grep.
|
||||||
|
(ii) Added --help to list all options with an explanatory phrase.
|
||||||
|
(iii) Added -r, --recursive to recurse into sub-directories.
|
||||||
|
(iv) Added -f, --file to read patterns from a file.
|
||||||
|
|
||||||
|
4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
|
||||||
|
script, to force use of CR or LF instead of \n in the source. On non-Unix
|
||||||
|
systems, the value can be set in config.h.
|
||||||
|
|
||||||
|
5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
|
||||||
|
absolute limit. Changed the text of the error message to make this clear, and
|
||||||
|
likewise updated the man page.
|
||||||
|
|
||||||
|
6. The limit of 99 on the number of capturing subpatterns has been removed.
|
||||||
|
The new limit is 65535, which I hope will not be a "real" limit.
|
||||||
|
|
||||||
|
|
||||||
|
Release 3.3 01-Aug-00
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
There is some support for UTF-8 character strings. This is incomplete and
|
||||||
|
experimental. The documentation describes what is and what is not implemented.
|
||||||
|
Otherwise, this is just a bug-fixing release.
|
||||||
|
|
||||||
|
|
||||||
|
Release 3.0 01-Feb-00
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
1. A "configure" script is now used to configure PCRE for Unix systems. It
|
||||||
|
builds a Makefile, a config.h file, and the pcre-config script.
|
||||||
|
|
||||||
|
2. PCRE is built as a shared library by default.
|
||||||
|
|
||||||
|
3. There is support for POSIX classes such as [:alpha:].
|
||||||
|
|
||||||
|
5. There is an experimental recursion feature.
|
||||||
|
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00
|
||||||
|
|
||||||
|
Please note that there has been a change in the API such that a larger
|
||||||
|
ovector is required at matching time, to provide some additional workspace.
|
||||||
|
The new man page has details. This change was necessary in order to support
|
||||||
|
some of the new functionality in Perl 5.005.
|
||||||
|
|
||||||
|
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00
|
||||||
|
|
||||||
|
Another (I hope this is the last!) change has been made to the API for the
|
||||||
|
pcre_compile() function. An additional argument has been added to make it
|
||||||
|
possible to pass over a pointer to character tables built in the current
|
||||||
|
locale by pcre_maketables(). To use the default tables, this new arguement
|
||||||
|
should be passed as NULL.
|
||||||
|
|
||||||
|
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
|
||||||
|
|
||||||
|
Yet another (and again I hope this really is the last) change has been made
|
||||||
|
to the API for the pcre_exec() function. An additional argument has been
|
||||||
|
added to make it possible to start the match other than at the start of the
|
||||||
|
subject string. This is important if there are lookbehinds. The new man
|
||||||
|
page has the details, but you just want to convert existing programs, all
|
||||||
|
you need to do is to stick in a new fifth argument to pcre_exec(), with a
|
||||||
|
value of zero. For example, change
|
||||||
|
|
||||||
|
pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize)
|
||||||
|
to
|
||||||
|
pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize)
|
||||||
|
|
||||||
|
****
|
|
@ -0,0 +1,172 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This is a freestanding support program to generate a file containing default
|
||||||
|
character tables for PCRE. The tables are built according to the default C
|
||||||
|
locale. Now that pcre_maketables is a function visible to the outside world, we
|
||||||
|
make use of its code from here in order to be consistent. */
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
#define DFTABLES /* pcre_maketables.c notices this */
|
||||||
|
#include "pcre_maketables.c"
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
FILE *f;
|
||||||
|
const unsigned char *tables = pcre_maketables();
|
||||||
|
const unsigned char *base_of_tables = tables;
|
||||||
|
|
||||||
|
if (argc != 2)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "dftables: one filename argument is required\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
f = fopen(argv[1], "wb");
|
||||||
|
if (f == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* There are two fprintf() calls here, because gcc in pedantic mode complains
|
||||||
|
about the very long string otherwise. */
|
||||||
|
|
||||||
|
fprintf(f,
|
||||||
|
"/*************************************************\n"
|
||||||
|
"* Perl-Compatible Regular Expressions *\n"
|
||||||
|
"*************************************************/\n\n"
|
||||||
|
"/* This file is automatically written by the dftables auxiliary \n"
|
||||||
|
"program. If you edit it by hand, you might like to edit the Makefile to \n"
|
||||||
|
"prevent its ever being regenerated.\n\n");
|
||||||
|
fprintf(f,
|
||||||
|
"This file contains the default tables for characters with codes less than\n"
|
||||||
|
"128 (ASCII characters). These tables are used when no external tables are\n"
|
||||||
|
"passed to PCRE. */\n\n"
|
||||||
|
"const unsigned char _pcre_default_tables[] = {\n\n"
|
||||||
|
"/* This table is a lower casing table. */\n\n");
|
||||||
|
|
||||||
|
fprintf(f, " ");
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||||
|
fprintf(f, "%3d", *tables++);
|
||||||
|
if (i != 255) fprintf(f, ",");
|
||||||
|
}
|
||||||
|
fprintf(f, ",\n\n");
|
||||||
|
|
||||||
|
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||||
|
|
||||||
|
fprintf(f, " ");
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||||
|
fprintf(f, "%3d", *tables++);
|
||||||
|
if (i != 255) fprintf(f, ",");
|
||||||
|
}
|
||||||
|
fprintf(f, ",\n\n");
|
||||||
|
|
||||||
|
fprintf(f,
|
||||||
|
"/* This table contains bit maps for various character classes.\n"
|
||||||
|
"Each map is 32 bytes long and the bits run from the least\n"
|
||||||
|
"significant end of each byte. The classes that have their own\n"
|
||||||
|
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
|
||||||
|
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||||
|
|
||||||
|
fprintf(f, " ");
|
||||||
|
for (i = 0; i < cbit_length; i++)
|
||||||
|
{
|
||||||
|
if ((i & 7) == 0 && i != 0)
|
||||||
|
{
|
||||||
|
if ((i & 31) == 0) fprintf(f, "\n");
|
||||||
|
fprintf(f, "\n ");
|
||||||
|
}
|
||||||
|
fprintf(f, "0x%02x", *tables++);
|
||||||
|
if (i != cbit_length - 1) fprintf(f, ",");
|
||||||
|
}
|
||||||
|
fprintf(f, ",\n\n");
|
||||||
|
|
||||||
|
fprintf(f,
|
||||||
|
"/* This table identifies various classes of character by individual bits:\n"
|
||||||
|
" 0x%02x white space character\n"
|
||||||
|
" 0x%02x letter\n"
|
||||||
|
" 0x%02x decimal digit\n"
|
||||||
|
" 0x%02x hexadecimal digit\n"
|
||||||
|
" 0x%02x alphanumeric or '_'\n"
|
||||||
|
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
|
||||||
|
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
|
||||||
|
ctype_meta);
|
||||||
|
|
||||||
|
fprintf(f, " ");
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if ((i & 7) == 0 && i != 0)
|
||||||
|
{
|
||||||
|
fprintf(f, " /* ");
|
||||||
|
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||||
|
else fprintf(f, "%3d-", i-8);
|
||||||
|
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||||
|
else fprintf(f, "%3d", i-1);
|
||||||
|
fprintf(f, " */\n ");
|
||||||
|
}
|
||||||
|
fprintf(f, "0x%02x", *tables++);
|
||||||
|
if (i != 255) fprintf(f, ",");
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "};/* ");
|
||||||
|
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||||
|
else fprintf(f, "%3d-", i-8);
|
||||||
|
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||||
|
else fprintf(f, "%3d", i-1);
|
||||||
|
fprintf(f, " */\n\n/* End of chartables.c */\n");
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
free((void *)base_of_tables);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of dftables.c */
|
|
@ -1,122 +0,0 @@
|
||||||
/* Muntyan 04/21/2005: changed some HAVE_* */
|
|
||||||
#include "config.h"
|
|
||||||
|
|
||||||
/* On Unix systems config.in is converted by configure into config.h. PCRE is
|
|
||||||
written in Standard C, but there are a few non-standard things it can cope
|
|
||||||
with, allowing it to run on SunOS4 and other "close to standard" systems.
|
|
||||||
|
|
||||||
On a non-Unix system you should just copy this file into config.h, and set up
|
|
||||||
the macros the way you need them. You should normally change the definitions of
|
|
||||||
HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
|
|
||||||
works, these cannot be made the defaults. If your system has bcopy() and not
|
|
||||||
memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
|
|
||||||
system has neither bcopy() nor memmove(), leave them both as 0; an emulation
|
|
||||||
function will be used. */
|
|
||||||
|
|
||||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
|
||||||
character codes, define this macro as 1. On systems that can use "configure",
|
|
||||||
this can be done via --enable-ebcdic. */
|
|
||||||
|
|
||||||
#ifndef EBCDIC
|
|
||||||
#define EBCDIC 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* If you are compiling for a system that needs some magic to be inserted
|
|
||||||
before the definition of an exported function, define this macro to contain the
|
|
||||||
relevant magic. It apears at the start of every exported function. */
|
|
||||||
|
|
||||||
#ifndef EXPORT /* muntyan */
|
|
||||||
#define EXPORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Define to empty if the "const" keyword does not work. */
|
|
||||||
|
|
||||||
#undef const
|
|
||||||
|
|
||||||
/* Define to `unsigned' if <sys/types.h> does not define. */
|
|
||||||
/* #undef size_t */ /* muntyan */
|
|
||||||
|
|
||||||
|
|
||||||
/* The following two definitions are mainly for the benefit of SunOS4, which
|
|
||||||
doesn't have the strerror() or memmove() functions that should be present in
|
|
||||||
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
|
|
||||||
normally be defined with the value 1 for other systems, but unfortunately we
|
|
||||||
can't make this the default because "configure" files generated by autoconf
|
|
||||||
will only change 0 to 1; they won't change 1 to 0 if the functions are not
|
|
||||||
found. */
|
|
||||||
|
|
||||||
#ifndef HAVE_STRERROR
|
|
||||||
#define HAVE_STRERROR 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef HAVE_MEMMOVE /* muntyan */
|
|
||||||
#define HAVE_MEMMOVE 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* There are some non-Unix systems that don't even have bcopy(). If this macro
|
|
||||||
is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
|
|
||||||
HAVE_BCOPY is not relevant. */
|
|
||||||
|
|
||||||
#ifndef HAVE_BCOPY /* muntyan */
|
|
||||||
#define HAVE_BCOPY 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The value of NEWLINE determines the newline character. The default is to
|
|
||||||
leave it up to the compiler, but some sites want to force a particular value.
|
|
||||||
On Unix systems, "configure" can be used to override this default. */
|
|
||||||
|
|
||||||
#ifndef NEWLINE
|
|
||||||
#define NEWLINE '\n'
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The value of LINK_SIZE determines the number of bytes used to store
|
|
||||||
links as offsets within the compiled regex. The default is 2, which allows for
|
|
||||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
|
||||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
|
|
||||||
longer patterns in extreme cases. On Unix systems, "configure" can be used to
|
|
||||||
override this default. */
|
|
||||||
|
|
||||||
#ifndef LINK_SIZE
|
|
||||||
#define LINK_SIZE 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The value of MATCH_LIMIT determines the default number of times the match()
|
|
||||||
function can be called during a single execution of pcre_exec(). (There is a
|
|
||||||
runtime method of setting a different limit.) The limit exists in order to
|
|
||||||
catch runaway regular expressions that take for ever to determine that they do
|
|
||||||
not match. The default is set very large so that it does not accidentally catch
|
|
||||||
legitimate cases. On Unix systems, "configure" can be used to override this
|
|
||||||
default default. */
|
|
||||||
|
|
||||||
#ifndef MATCH_LIMIT
|
|
||||||
#define MATCH_LIMIT 10000000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
|
||||||
required for holding the pointers to capturing substrings because PCRE requires
|
|
||||||
three integers per substring, whereas the POSIX interface provides only two. If
|
|
||||||
the number of expected substrings is small, the wrapper function uses space on
|
|
||||||
the stack, because this is faster than using malloc() for each call. The
|
|
||||||
threshold above which the stack is no longer use is defined by POSIX_MALLOC_
|
|
||||||
THRESHOLD. On Unix systems, "configure" can be used to override this default.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef POSIX_MALLOC_THRESHOLD
|
|
||||||
#define POSIX_MALLOC_THRESHOLD 10
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
|
||||||
This can sometimes be a problem on systems that have stacks of limited size.
|
|
||||||
Define NO_RECURSE to get a version that doesn't use recursion in the match()
|
|
||||||
function; instead it creates its own stack by steam using pcre_recurse_malloc
|
|
||||||
to get memory. For more detail, see comments and other stuff just above the
|
|
||||||
match() function. On Unix systems, "configure" can be used to set this in the
|
|
||||||
Makefile (use --disable-recursion). */
|
|
||||||
|
|
||||||
/* #define NO_RECURSE */
|
|
||||||
|
|
||||||
#ifndef SUPPORT_UTF8
|
|
||||||
#define SUPPORT_UTF8 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* End */
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,172 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* Copyright (c) 1997-2003 University of Cambridge */
|
|
||||||
|
|
||||||
#ifndef _PCRE_H
|
|
||||||
#define _PCRE_H
|
|
||||||
|
|
||||||
/* The file pcre.h is build by "configure". Do not edit it; instead
|
|
||||||
make changes to pcre.in. */
|
|
||||||
|
|
||||||
#define PCRE_MAJOR 4
|
|
||||||
#define PCRE_MINOR 5
|
|
||||||
#define PCRE_DATE 01-December-2003
|
|
||||||
|
|
||||||
/* Win32 uses DLL by default */
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
# ifdef PCRE_DEFINITION
|
|
||||||
# ifdef DLL_EXPORT
|
|
||||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
|
||||||
# endif
|
|
||||||
# else
|
|
||||||
# ifndef PCRE_STATIC
|
|
||||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#ifndef PCRE_DATA_SCOPE
|
|
||||||
# define PCRE_DATA_SCOPE extern
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
|
||||||
it is needed here for malloc. */
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
/* Allow for C++ users */
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Options */
|
|
||||||
|
|
||||||
#define PCRE_CASELESS 0x0001
|
|
||||||
#define PCRE_MULTILINE 0x0002
|
|
||||||
#define PCRE_DOTALL 0x0004
|
|
||||||
#define PCRE_EXTENDED 0x0008
|
|
||||||
#define PCRE_ANCHORED 0x0010
|
|
||||||
#define PCRE_DOLLAR_ENDONLY 0x0020
|
|
||||||
#define PCRE_EXTRA 0x0040
|
|
||||||
#define PCRE_NOTBOL 0x0080
|
|
||||||
#define PCRE_NOTEOL 0x0100
|
|
||||||
#define PCRE_UNGREEDY 0x0200
|
|
||||||
#define PCRE_NOTEMPTY 0x0400
|
|
||||||
#define PCRE_UTF8 0x0800
|
|
||||||
#define PCRE_NO_AUTO_CAPTURE 0x1000
|
|
||||||
#define PCRE_NO_UTF8_CHECK 0x2000
|
|
||||||
|
|
||||||
/* Exec-time and get/set-time error codes */
|
|
||||||
|
|
||||||
#define PCRE_ERROR_NOMATCH (-1)
|
|
||||||
#define PCRE_ERROR_NULL (-2)
|
|
||||||
#define PCRE_ERROR_BADOPTION (-3)
|
|
||||||
#define PCRE_ERROR_BADMAGIC (-4)
|
|
||||||
#define PCRE_ERROR_UNKNOWN_NODE (-5)
|
|
||||||
#define PCRE_ERROR_NOMEMORY (-6)
|
|
||||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
|
||||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
|
||||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
|
||||||
#define PCRE_ERROR_BADUTF8 (-10)
|
|
||||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
|
||||||
|
|
||||||
/* Request types for pcre_fullinfo() */
|
|
||||||
|
|
||||||
#define PCRE_INFO_OPTIONS 0
|
|
||||||
#define PCRE_INFO_SIZE 1
|
|
||||||
#define PCRE_INFO_CAPTURECOUNT 2
|
|
||||||
#define PCRE_INFO_BACKREFMAX 3
|
|
||||||
#define PCRE_INFO_FIRSTBYTE 4
|
|
||||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
|
||||||
#define PCRE_INFO_FIRSTTABLE 5
|
|
||||||
#define PCRE_INFO_LASTLITERAL 6
|
|
||||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
|
||||||
#define PCRE_INFO_NAMECOUNT 8
|
|
||||||
#define PCRE_INFO_NAMETABLE 9
|
|
||||||
#define PCRE_INFO_STUDYSIZE 10
|
|
||||||
|
|
||||||
/* Request types for pcre_config() */
|
|
||||||
|
|
||||||
#define PCRE_CONFIG_UTF8 0
|
|
||||||
#define PCRE_CONFIG_NEWLINE 1
|
|
||||||
#define PCRE_CONFIG_LINK_SIZE 2
|
|
||||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
|
||||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
|
||||||
#define PCRE_CONFIG_STACKRECURSE 5
|
|
||||||
|
|
||||||
/* Bit flags for the pcre_extra structure */
|
|
||||||
|
|
||||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
|
||||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
|
||||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
|
||||||
|
|
||||||
/* Types */
|
|
||||||
|
|
||||||
struct real_pcre; /* declaration; the definition is private */
|
|
||||||
typedef struct real_pcre pcre;
|
|
||||||
|
|
||||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
|
||||||
such as way as to be extensible. */
|
|
||||||
|
|
||||||
typedef struct pcre_extra {
|
|
||||||
unsigned long int flags; /* Bits for which fields are set */
|
|
||||||
void *study_data; /* Opaque data from pcre_study() */
|
|
||||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
|
||||||
void *callout_data; /* Data passed back in callouts */
|
|
||||||
} pcre_extra;
|
|
||||||
|
|
||||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
|
||||||
structure so that new fields can be added on the end in future versions,
|
|
||||||
without changing the API of the function, thereby allowing old clients to work
|
|
||||||
without modification. */
|
|
||||||
|
|
||||||
typedef struct pcre_callout_block {
|
|
||||||
int version; /* Identifies version of block */
|
|
||||||
/* ------------------------ Version 0 ------------------------------- */
|
|
||||||
int callout_number; /* Number compiled into pattern */
|
|
||||||
int *offset_vector; /* The offset vector */
|
|
||||||
const char *subject; /* The subject being matched */
|
|
||||||
int subject_length; /* The length of the subject */
|
|
||||||
int start_match; /* Offset to start of this match attempt */
|
|
||||||
int current_position; /* Where we currently are */
|
|
||||||
int capture_top; /* Max current capture */
|
|
||||||
int capture_last; /* Most recently closed capture */
|
|
||||||
void *callout_data; /* Data passed in with the call */
|
|
||||||
/* ------------------------------------------------------------------ */
|
|
||||||
} pcre_callout_block;
|
|
||||||
|
|
||||||
/* Exported PCRE functions */
|
|
||||||
|
|
||||||
extern pcre *_pcre_compile(const char *, int, const char **,
|
|
||||||
int *, const unsigned char *);
|
|
||||||
extern int _pcre_config(int, void *);
|
|
||||||
extern int _pcre_copy_named_substring(const pcre *, const char *,
|
|
||||||
int *, int, const char *, char *, int);
|
|
||||||
extern int _pcre_copy_substring(const char *, int *, int, int,
|
|
||||||
char *, int);
|
|
||||||
extern int _pcre_exec(const pcre *, const pcre_extra *,
|
|
||||||
const char *, int, int, int, int *, int);
|
|
||||||
extern void _pcre_free_substring(const char *);
|
|
||||||
extern void _pcre_free_substring_list(const char **);
|
|
||||||
extern int _pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
|
||||||
void *);
|
|
||||||
extern int _pcre_get_named_substring(const pcre *, const char *,
|
|
||||||
int *, int, const char *, const char **);
|
|
||||||
extern int _pcre_get_stringnumber(const pcre *, const char *);
|
|
||||||
extern int _pcre_get_substring(const char *, int *, int, int,
|
|
||||||
const char **);
|
|
||||||
extern int _pcre_get_substring_list(const char *, int *, int,
|
|
||||||
const char ***);
|
|
||||||
extern int _pcre_info(const pcre *, int *, int *);
|
|
||||||
extern const unsigned char *_pcre_maketables(void);
|
|
||||||
extern pcre_extra *_pcre_study(const pcre *, int, const char **);
|
|
||||||
extern const char *_pcre_version(void);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* End of pcre.h */
|
|
|
@ -0,0 +1,286 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* In its original form, this is the .in file that is transformed by
|
||||||
|
"configure" into pcre.h.
|
||||||
|
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _PCRE_H
|
||||||
|
#define _PCRE_H
|
||||||
|
|
||||||
|
/* The file pcre.h is build by "configure". Do not edit it; instead
|
||||||
|
make changes to pcre.in. */
|
||||||
|
|
||||||
|
#define PCRE_MAJOR @PCRE_MAJOR@
|
||||||
|
#define PCRE_MINOR @PCRE_MINOR@
|
||||||
|
#define PCRE_DATE @PCRE_DATE@
|
||||||
|
|
||||||
|
/* Win32 uses DLL by default; it needs special stuff for exported functions. */
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# ifdef PCRE_DEFINITION
|
||||||
|
# ifdef DLL_EXPORT
|
||||||
|
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# ifndef PCRE_STATIC
|
||||||
|
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* For other operating systems, we use the standard "extern". */
|
||||||
|
|
||||||
|
#ifndef PCRE_DATA_SCOPE
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# define PCRE_DATA_SCOPE extern "C"
|
||||||
|
# else
|
||||||
|
# define PCRE_DATA_SCOPE extern
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||||
|
it is needed here for malloc. */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/* Allow for C++ users */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Options */
|
||||||
|
|
||||||
|
#define PCRE_CASELESS 0x00000001
|
||||||
|
#define PCRE_MULTILINE 0x00000002
|
||||||
|
#define PCRE_DOTALL 0x00000004
|
||||||
|
#define PCRE_EXTENDED 0x00000008
|
||||||
|
#define PCRE_ANCHORED 0x00000010
|
||||||
|
#define PCRE_DOLLAR_ENDONLY 0x00000020
|
||||||
|
#define PCRE_EXTRA 0x00000040
|
||||||
|
#define PCRE_NOTBOL 0x00000080
|
||||||
|
#define PCRE_NOTEOL 0x00000100
|
||||||
|
#define PCRE_UNGREEDY 0x00000200
|
||||||
|
#define PCRE_NOTEMPTY 0x00000400
|
||||||
|
#define PCRE_UTF8 0x00000800
|
||||||
|
#define PCRE_NO_AUTO_CAPTURE 0x00001000
|
||||||
|
#define PCRE_NO_UTF8_CHECK 0x00002000
|
||||||
|
#define PCRE_AUTO_CALLOUT 0x00004000
|
||||||
|
#define PCRE_PARTIAL 0x00008000
|
||||||
|
#define PCRE_DFA_SHORTEST 0x00010000
|
||||||
|
#define PCRE_DFA_RESTART 0x00020000
|
||||||
|
#define PCRE_FIRSTLINE 0x00040000
|
||||||
|
|
||||||
|
/* Exec-time and get/set-time error codes */
|
||||||
|
|
||||||
|
#define PCRE_ERROR_NOMATCH (-1)
|
||||||
|
#define PCRE_ERROR_NULL (-2)
|
||||||
|
#define PCRE_ERROR_BADOPTION (-3)
|
||||||
|
#define PCRE_ERROR_BADMAGIC (-4)
|
||||||
|
#define PCRE_ERROR_UNKNOWN_NODE (-5)
|
||||||
|
#define PCRE_ERROR_NOMEMORY (-6)
|
||||||
|
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||||
|
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||||
|
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||||
|
#define PCRE_ERROR_BADUTF8 (-10)
|
||||||
|
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
||||||
|
#define PCRE_ERROR_PARTIAL (-12)
|
||||||
|
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||||
|
#define PCRE_ERROR_INTERNAL (-14)
|
||||||
|
#define PCRE_ERROR_BADCOUNT (-15)
|
||||||
|
#define PCRE_ERROR_DFA_UITEM (-16)
|
||||||
|
#define PCRE_ERROR_DFA_UCOND (-17)
|
||||||
|
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||||
|
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||||
|
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||||
|
|
||||||
|
/* Request types for pcre_fullinfo() */
|
||||||
|
|
||||||
|
#define PCRE_INFO_OPTIONS 0
|
||||||
|
#define PCRE_INFO_SIZE 1
|
||||||
|
#define PCRE_INFO_CAPTURECOUNT 2
|
||||||
|
#define PCRE_INFO_BACKREFMAX 3
|
||||||
|
#define PCRE_INFO_FIRSTBYTE 4
|
||||||
|
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||||
|
#define PCRE_INFO_FIRSTTABLE 5
|
||||||
|
#define PCRE_INFO_LASTLITERAL 6
|
||||||
|
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||||
|
#define PCRE_INFO_NAMECOUNT 8
|
||||||
|
#define PCRE_INFO_NAMETABLE 9
|
||||||
|
#define PCRE_INFO_STUDYSIZE 10
|
||||||
|
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||||
|
|
||||||
|
/* Request types for pcre_config() */
|
||||||
|
|
||||||
|
#define PCRE_CONFIG_UTF8 0
|
||||||
|
#define PCRE_CONFIG_NEWLINE 1
|
||||||
|
#define PCRE_CONFIG_LINK_SIZE 2
|
||||||
|
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||||
|
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||||
|
#define PCRE_CONFIG_STACKRECURSE 5
|
||||||
|
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||||
|
|
||||||
|
/* Bit flags for the pcre_extra structure */
|
||||||
|
|
||||||
|
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||||
|
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||||
|
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||||
|
#define PCRE_EXTRA_TABLES 0x0008
|
||||||
|
|
||||||
|
/* Types */
|
||||||
|
|
||||||
|
struct real_pcre; /* declaration; the definition is private */
|
||||||
|
typedef struct real_pcre pcre;
|
||||||
|
|
||||||
|
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||||
|
such as way as to be extensible. Always add new fields at the end, in order to
|
||||||
|
remain compatible. */
|
||||||
|
|
||||||
|
typedef struct pcre_extra {
|
||||||
|
unsigned long int flags; /* Bits for which fields are set */
|
||||||
|
void *study_data; /* Opaque data from pcre_study() */
|
||||||
|
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||||
|
void *callout_data; /* Data passed back in callouts */
|
||||||
|
const unsigned char *tables; /* Pointer to character tables */
|
||||||
|
} pcre_extra;
|
||||||
|
|
||||||
|
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||||
|
structure so that new fields can be added on the end in future versions,
|
||||||
|
without changing the API of the function, thereby allowing old clients to work
|
||||||
|
without modification. */
|
||||||
|
|
||||||
|
typedef struct pcre_callout_block {
|
||||||
|
int version; /* Identifies version of block */
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */
|
||||||
|
int callout_number; /* Number compiled into pattern */
|
||||||
|
int *offset_vector; /* The offset vector */
|
||||||
|
const char *subject; /* The subject being matched */
|
||||||
|
int subject_length; /* The length of the subject */
|
||||||
|
int start_match; /* Offset to start of this match attempt */
|
||||||
|
int current_position; /* Where we currently are in the subject */
|
||||||
|
int capture_top; /* Max current capture */
|
||||||
|
int capture_last; /* Most recently closed capture */
|
||||||
|
void *callout_data; /* Data passed in with the call */
|
||||||
|
/* ------------------- Added for Version 1 -------------------------- */
|
||||||
|
int pattern_position; /* Offset to next item in the pattern */
|
||||||
|
int next_item_length; /* Length of next item in the pattern */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
} pcre_callout_block;
|
||||||
|
|
||||||
|
|
||||||
|
/* Prefix all pcre api with underscore - Muntyan */
|
||||||
|
#define pcre_malloc _pcre_malloc
|
||||||
|
#define pcre_free _pcre_free
|
||||||
|
#define pcre_stack_malloc _pcre_stack_malloc
|
||||||
|
#define pcre_stack_free _pcre_stack_free
|
||||||
|
#define pcre_callout _pcre_callout
|
||||||
|
#define pcre_compile _pcre_compile
|
||||||
|
#define pcre_compile2 _pcre_compile2
|
||||||
|
#define pcre_config _pcre_config
|
||||||
|
#define pcre_copy_named_substring _pcre_copy_named_substring
|
||||||
|
#define pcre_copy_substring _pcre_copy_substring
|
||||||
|
#define pcre_dfa_exec _pcre_dfa_exec
|
||||||
|
#define pcre_exec _pcre_exec
|
||||||
|
#define pcre_free_substring _pcre_free_substring
|
||||||
|
#define pcre_free_substring_list _pcre_free_substring_list
|
||||||
|
#define pcre_fullinfo _pcre_fullinfo
|
||||||
|
#define pcre_get_named_substring _pcre_get_named_substring
|
||||||
|
#define pcre_get_stringnumber _pcre_get_stringnumber
|
||||||
|
#define pcre_get_substring _pcre_get_substring
|
||||||
|
#define pcre_get_substring_list _pcre_get_substring_list
|
||||||
|
#define pcre_info _pcre_info
|
||||||
|
#define pcre_maketables _pcre_maketables
|
||||||
|
#define pcre_refcount _pcre_refcount
|
||||||
|
#define pcre_study _pcre_study
|
||||||
|
#define pcre_version _pcre_version
|
||||||
|
|
||||||
|
|
||||||
|
/* Indirection for store get and free functions. These can be set to
|
||||||
|
alternative malloc/free functions if required. Special ones are used in the
|
||||||
|
non-recursive case for "frames". There is also an optional callout function
|
||||||
|
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||||
|
have to take another form. */
|
||||||
|
|
||||||
|
#ifndef VPCOMPAT
|
||||||
|
PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
|
||||||
|
PCRE_DATA_SCOPE void (*pcre_free)(void *);
|
||||||
|
PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
|
||||||
|
PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
|
||||||
|
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||||
|
#else /* VPCOMPAT */
|
||||||
|
PCRE_DATA_SCOPE void *pcre_malloc(size_t);
|
||||||
|
PCRE_DATA_SCOPE void pcre_free(void *);
|
||||||
|
PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
|
||||||
|
PCRE_DATA_SCOPE void pcre_stack_free(void *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *);
|
||||||
|
#endif /* VPCOMPAT */
|
||||||
|
|
||||||
|
/* Exported PCRE functions */
|
||||||
|
|
||||||
|
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
|
||||||
|
const unsigned char *);
|
||||||
|
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||||
|
int *, const unsigned char *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_config(int, void *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
|
||||||
|
int *, int, const char *, char *, int);
|
||||||
|
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||||
|
int);
|
||||||
|
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||||
|
const char *, int, int, int, int *, int , int *, int);
|
||||||
|
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, const char *,
|
||||||
|
int, int, int, int *, int);
|
||||||
|
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
||||||
|
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
||||||
|
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||||
|
void *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||||
|
int *, int, const char *, const char **);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||||
|
const char **);
|
||||||
|
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||||
|
const char ***);
|
||||||
|
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
|
||||||
|
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
|
||||||
|
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
|
||||||
|
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||||
|
PCRE_DATA_SCOPE const char *pcre_version(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} /* extern "C" */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* End of pcre.h */
|
|
@ -6,11 +6,11 @@
|
||||||
program. If you edit it by hand, you might like to edit the Makefile to
|
program. If you edit it by hand, you might like to edit the Makefile to
|
||||||
prevent its ever being regenerated.
|
prevent its ever being regenerated.
|
||||||
|
|
||||||
This file is #included in the compilation of pcre.c to build the default
|
This file contains the default tables for characters with codes less than
|
||||||
character tables which are used when no tables are passed to the compile
|
128 (ASCII characters). These tables are used when no external tables are
|
||||||
function. */
|
passed to PCRE. */
|
||||||
|
|
||||||
static unsigned char _pcre_default_tables[] = {
|
const unsigned char _pcre_default_tables[] = {
|
||||||
|
|
||||||
/* This table is a lower casing table. */
|
/* This table is a lower casing table. */
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,112 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_config(). */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Return info about what features are configured *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function has an extensible interface so that additional items can be
|
||||||
|
added compatibly.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
what what information is required
|
||||||
|
where where to put the information
|
||||||
|
|
||||||
|
Returns: 0 if data returned, negative on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT int
|
||||||
|
pcre_config(int what, void *where)
|
||||||
|
{
|
||||||
|
switch (what)
|
||||||
|
{
|
||||||
|
case PCRE_CONFIG_UTF8:
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_NEWLINE:
|
||||||
|
*((int *)where) = NEWLINE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_LINK_SIZE:
|
||||||
|
*((int *)where) = LINK_SIZE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||||
|
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_MATCH_LIMIT:
|
||||||
|
*((unsigned int *)where) = MATCH_LIMIT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_STACKRECURSE:
|
||||||
|
#ifdef NO_RECURSE
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: return PCRE_ERROR_BADOPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_config.c */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,149 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/*PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_fullinfo(), which returns
|
||||||
|
information about a compiled pattern. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Return info about compiled pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is a newer "info" function which has an extensible interface so
|
||||||
|
that additional items can be added compatibly.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
argument_re points to compiled code
|
||||||
|
extra_data points extra data, or NULL
|
||||||
|
what what information is required
|
||||||
|
where where to put the information
|
||||||
|
|
||||||
|
Returns: 0 if data returned, negative on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT int
|
||||||
|
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||||
|
void *where)
|
||||||
|
{
|
||||||
|
real_pcre internal_re;
|
||||||
|
pcre_study_data internal_study;
|
||||||
|
const real_pcre *re = (const real_pcre *)argument_re;
|
||||||
|
const pcre_study_data *study = NULL;
|
||||||
|
|
||||||
|
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||||
|
|
||||||
|
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||||
|
study = (const pcre_study_data *)extra_data->study_data;
|
||||||
|
|
||||||
|
if (re->magic_number != MAGIC_NUMBER)
|
||||||
|
{
|
||||||
|
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
|
||||||
|
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||||
|
if (study != NULL) study = &internal_study;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (what)
|
||||||
|
{
|
||||||
|
case PCRE_INFO_OPTIONS:
|
||||||
|
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_SIZE:
|
||||||
|
*((size_t *)where) = re->size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_STUDYSIZE:
|
||||||
|
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_CAPTURECOUNT:
|
||||||
|
*((int *)where) = re->top_bracket;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_BACKREFMAX:
|
||||||
|
*((int *)where) = re->top_backref;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_FIRSTBYTE:
|
||||||
|
*((int *)where) =
|
||||||
|
((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||||
|
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Make sure we pass back the pointer to the bit vector in the external
|
||||||
|
block, not the internal copy (with flipped integer fields). */
|
||||||
|
|
||||||
|
case PCRE_INFO_FIRSTTABLE:
|
||||||
|
*((const uschar **)where) =
|
||||||
|
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
|
||||||
|
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_LASTLITERAL:
|
||||||
|
*((int *)where) =
|
||||||
|
((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_NAMEENTRYSIZE:
|
||||||
|
*((int *)where) = re->name_entry_size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_NAMECOUNT:
|
||||||
|
*((int *)where) = re->name_count;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_NAMETABLE:
|
||||||
|
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_DEFAULT_TABLES:
|
||||||
|
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: return PCRE_ERROR_BADOPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_fullinfo.c */
|
|
@ -2,45 +2,48 @@
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/*
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
This is a library of functions to support regular expressions whose syntax
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
and semantics are as close as possible to those of the Perl 5 language. See
|
|
||||||
the file Tech.Notes for some information on the internals.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
Redistribution and use in source and binary forms, with or without
|
||||||
computer system, and to redistribute it freely, subject to the following
|
modification, are permitted provided that the following conditions are met:
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
this list of conditions and the following disclaimer.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
explicit claim or by omission.
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
misrepresented as being the original software.
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
supersede any condition above with which it is incompatible.
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/* This module contains some convenience functions for extracting substrings
|
/* This module contains some convenience functions for extracting substrings
|
||||||
from the subject string after a regex match has succeeded. The original idea
|
from the subject string after a regex match has succeeded. The original idea
|
||||||
for these functions came from Scott Wimer <scottw@cgibuilder.com>. */
|
for these functions came from Scott Wimer. */
|
||||||
|
|
||||||
|
|
||||||
/* Include the internals header, which itself includes Standard C headers plus
|
#include "pcre_internal.h"
|
||||||
the external pcre header. */
|
|
||||||
|
|
||||||
#include "internal.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
|
@ -59,20 +62,20 @@ Returns: the number of the named parentheses, or a negative number
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
_pcre_get_stringnumber(const pcre *code, const char *stringname)
|
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
int entrysize;
|
int entrysize;
|
||||||
int top, bot;
|
int top, bot;
|
||||||
uschar *nametable;
|
uschar *nametable;
|
||||||
|
|
||||||
if ((rc = _pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
return rc;
|
return rc;
|
||||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
|
||||||
if ((rc = _pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||||
return rc;
|
return rc;
|
||||||
if ((rc = _pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
bot = 0;
|
bot = 0;
|
||||||
|
@ -118,7 +121,7 @@ Returns: if successful:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
_pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||||
int stringnumber, char *buffer, int size)
|
int stringnumber, char *buffer, int size)
|
||||||
{
|
{
|
||||||
int yield;
|
int yield;
|
||||||
|
@ -162,12 +165,12 @@ Returns: if successful:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
_pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||||
int stringcount, const char *stringname, char *buffer, int size)
|
int stringcount, const char *stringname, char *buffer, int size)
|
||||||
{
|
{
|
||||||
int n = _pcre_get_stringnumber(code, stringname);
|
int n = pcre_get_stringnumber(code, stringname);
|
||||||
if (n <= 0) return n;
|
if (n <= 0) return n;
|
||||||
return _pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -194,7 +197,7 @@ Returns: if successful: 0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
_pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||||
const char ***listptr)
|
const char ***listptr)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -206,7 +209,7 @@ char *p;
|
||||||
for (i = 0; i < double_count; i += 2)
|
for (i = 0; i < double_count; i += 2)
|
||||||
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
||||||
|
|
||||||
stringlist = (char **)(g_malloc)(size);
|
stringlist = (char **)(pcre_malloc)(size);
|
||||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||||
|
|
||||||
*listptr = (const char **)stringlist;
|
*listptr = (const char **)stringlist;
|
||||||
|
@ -227,6 +230,25 @@ return 0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free store obtained by get_substring_list *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function exists for the benefit of people calling PCRE from non-C
|
||||||
|
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||||
|
|
||||||
|
Argument: the result of a previous pcre_get_substring_list()
|
||||||
|
Returns: nothing
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
pcre_free_substring_list(const char **pointer)
|
||||||
|
{
|
||||||
|
(pcre_free)((void *)pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Copy captured string to new store *
|
* Copy captured string to new store *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -253,7 +275,7 @@ Returns: if successful:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
_pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||||
int stringnumber, const char **stringptr)
|
int stringnumber, const char **stringptr)
|
||||||
{
|
{
|
||||||
int yield;
|
int yield;
|
||||||
|
@ -262,7 +284,7 @@ if (stringnumber < 0 || stringnumber >= stringcount)
|
||||||
return PCRE_ERROR_NOSUBSTRING;
|
return PCRE_ERROR_NOSUBSTRING;
|
||||||
stringnumber *= 2;
|
stringnumber *= 2;
|
||||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||||
substring = (char *)(g_malloc)(yield + 1);
|
substring = (char *)(pcre_malloc)(yield + 1);
|
||||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||||
memcpy(substring, subject + ovector[stringnumber], yield);
|
memcpy(substring, subject + ovector[stringnumber], yield);
|
||||||
substring[yield] = 0;
|
substring[yield] = 0;
|
||||||
|
@ -299,15 +321,32 @@ Returns: if successful:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
_pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||||
int stringcount, const char *stringname, const char **stringptr)
|
int stringcount, const char *stringname, const char **stringptr)
|
||||||
{
|
{
|
||||||
int n = _pcre_get_stringnumber(code, stringname);
|
int n = pcre_get_stringnumber(code, stringname);
|
||||||
if (n <= 0) return n;
|
if (n <= 0) return n;
|
||||||
return _pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* End of get.c */
|
/*************************************************
|
||||||
|
* Free store obtained by get_substring *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function exists for the benefit of people calling PCRE from non-C
|
||||||
|
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||||
|
|
||||||
|
Argument: the result of a previous pcre_get_substring()
|
||||||
|
Returns: nothing
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
pcre_free_substring(const char *pointer)
|
||||||
|
{
|
||||||
|
(pcre_free)((void *)pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_get.c */
|
|
@ -0,0 +1,69 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains global variables that are exported by the PCRE library.
|
||||||
|
PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||||
|
However, it calls memory allocation and freeing functions via the four
|
||||||
|
indirections below, and it can optionally do callouts, using the fifth
|
||||||
|
indirection. These values can be changed by the caller, but are shared between
|
||||||
|
all threads. However, when compiling for Virtual Pascal, things are done
|
||||||
|
differently, and global variables are not used (see pcre.in). */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef VPCOMPAT
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" void *(*pcre_malloc)(size_t) = malloc;
|
||||||
|
extern "C" void (*pcre_free)(void *) = free;
|
||||||
|
extern "C" void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||||
|
extern "C" void (*pcre_stack_free)(void *) = free;
|
||||||
|
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||||
|
#else
|
||||||
|
void *(*pcre_malloc)(size_t) = malloc;
|
||||||
|
void (*pcre_free)(void *) = free;
|
||||||
|
void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||||
|
void (*pcre_stack_free)(void *) = free;
|
||||||
|
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* End of pcre_globals.c */
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_info(), which gives some
|
||||||
|
information about a compiled pattern. However, use of this function is now
|
||||||
|
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* (Obsolete) Return info about compiled pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is the original "info" function. It picks potentially useful data out
|
||||||
|
of the private structure, but its interface was too rigid. It remains for
|
||||||
|
backwards compatibility. The public options are passed back in an int - though
|
||||||
|
the re->options field has been expanded to a long int, all the public options
|
||||||
|
at the low end of it, and so even on 16-bit systems this will still be OK.
|
||||||
|
Therefore, I haven't changed the API for pcre_info().
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
argument_re points to compiled code
|
||||||
|
optptr where to pass back the options
|
||||||
|
first_byte where to pass back the first character,
|
||||||
|
or -1 if multiline and all branches start ^,
|
||||||
|
or -2 otherwise
|
||||||
|
|
||||||
|
Returns: number of capturing subpatterns
|
||||||
|
or negative values on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT int
|
||||||
|
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||||
|
{
|
||||||
|
real_pcre internal_re;
|
||||||
|
const real_pcre *re = (const real_pcre *)argument_re;
|
||||||
|
if (re == NULL) return PCRE_ERROR_NULL;
|
||||||
|
if (re->magic_number != MAGIC_NUMBER)
|
||||||
|
{
|
||||||
|
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
|
||||||
|
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||||
|
}
|
||||||
|
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
|
||||||
|
if (first_byte != NULL)
|
||||||
|
*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||||
|
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||||
|
return re->top_bracket;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_info.c */
|
|
@ -3,44 +3,73 @@
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
|
|
||||||
/* This is a library of functions to support regular expressions whose syntax
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
and semantics are as close as possible to those of the Perl 5 language. See
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
the file Tech.Notes for some information on the internals.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
Redistribution and use in source and binary forms, with or without
|
||||||
computer system, and to redistribute it freely, subject to the following
|
modification, are permitted provided that the following conditions are met:
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
this list of conditions and the following disclaimer.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
explicit claim or by omission.
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
misrepresented as being the original software.
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
supersede any condition above with which it is incompatible.
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* This header contains definitions that are shared between the different
|
/* This header contains definitions that are shared between the different
|
||||||
modules, but which are not relevant to the outside. */
|
modules, but which are not relevant to the exported API. This includes some
|
||||||
|
functions whose names all begin with "_pcre_". */
|
||||||
|
|
||||||
|
#ifndef PCRE_INTERNAL_H
|
||||||
|
#define PCRE_INTERNAL_H
|
||||||
|
|
||||||
|
/* Define DEBUG to get debugging output on stdout. */
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
#define DEBUG
|
||||||
|
#endif
|
||||||
|
#ifdef DEBUG
|
||||||
|
#undef DEBUG
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
|
||||||
|
inline, and there are *still* stupid compilers about that don't like indented
|
||||||
|
pre-processor statements, or at least there were when I first wrote this. After
|
||||||
|
all, it had only been about 10 years then... */
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
#define DPRINTF(p) printf p
|
||||||
|
#else
|
||||||
|
#define DPRINTF(p) /*nothing*/
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* Get the definitions provided by running "configure" */
|
/* Get the definitions provided by running "configure" */
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
#include "pcre-config.h"
|
|
||||||
|
|
||||||
/* Standard C headers plus the external interface definition. The only time
|
/* Standard C headers plus the external interface definition. The only time
|
||||||
setjmp and stdarg are used is when NO_RECURSE is set. */
|
setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||||
|
@ -58,7 +87,44 @@ setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
|
||||||
|
cannot determine these outside the compilation (e.g. by running a program as
|
||||||
|
part of "configure") because PCRE is often cross-compiled for use on other
|
||||||
|
systems. Instead we make use of the maximum sizes that are available at
|
||||||
|
preprocessor time in standard C environments. */
|
||||||
|
|
||||||
|
// #if USHRT_MAX == 65535
|
||||||
|
// typedef unsigned short pcre_uint16;
|
||||||
|
// #elif UINT_MAX == 65535
|
||||||
|
// typedef unsigned int pcre_uint16;
|
||||||
|
// #else
|
||||||
|
// #error Cannot determine a type for 16-bit unsigned integers
|
||||||
|
// #endif
|
||||||
|
typedef guint16 pcre_uint16;
|
||||||
|
|
||||||
|
// #if UINT_MAX == 4294967295
|
||||||
|
// typedef unsigned int pcre_uint32;
|
||||||
|
// #elif ULONG_MAX == 4294967295
|
||||||
|
// typedef unsigned long int pcre_uint32;
|
||||||
|
// #else
|
||||||
|
// #error Cannot determine a type for 32-bit unsigned integers
|
||||||
|
// #endif
|
||||||
|
typedef guint32 pcre_uint32;
|
||||||
|
|
||||||
|
/* All character handling must be done as unsigned characters. Otherwise there
|
||||||
|
are problems with top-bit-set characters and functions such as isspace().
|
||||||
|
However, we leave the interface to the outside world as char *, because that
|
||||||
|
should make things easier for callers. We define a short type for unsigned char
|
||||||
|
to save lots of typing. I tried "uchar", but it causes problems on Digital
|
||||||
|
Unix, where it is defined in sys/types, so use "uschar" instead. */
|
||||||
|
|
||||||
|
typedef unsigned char uschar;
|
||||||
|
|
||||||
|
/* Include the public PCRE header and the definitions of UCP character property
|
||||||
|
values. */
|
||||||
|
|
||||||
#include "pcre.h"
|
#include "pcre.h"
|
||||||
|
#include "ucp.h"
|
||||||
|
|
||||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||||
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
||||||
|
@ -84,27 +150,27 @@ case in PCRE. */
|
||||||
#define memmove(a, b, c) bcopy(b, a, c)
|
#define memmove(a, b, c) bcopy(b, a, c)
|
||||||
#else /* HAVE_BCOPY */
|
#else /* HAVE_BCOPY */
|
||||||
void *
|
void *
|
||||||
_pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
dest += n;
|
dest += n;
|
||||||
src += n;
|
src += n;
|
||||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||||
return dest;
|
|
||||||
}
|
}
|
||||||
#define memmove(a, b, c) _pcre_memmove(a, b, c)
|
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||||
#endif /* not HAVE_BCOPY */
|
#endif /* not HAVE_BCOPY */
|
||||||
#endif /* not HAVE_MEMMOVE */
|
#endif /* not HAVE_MEMMOVE */
|
||||||
#endif /* not VPCOMPAT */
|
#endif /* not VPCOMPAT */
|
||||||
|
|
||||||
|
|
||||||
/* PCRE keeps offsets in its compiled code as 2-byte quantities by default.
|
/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
|
||||||
These are used, for example, to link from the start of a subpattern to its
|
in big-endian order) by default. These are used, for example, to link from the
|
||||||
alternatives and its end. The use of 2 bytes per offset limits the size of the
|
start of a subpattern to its alternatives and its end. The use of 2 bytes per
|
||||||
compiled regex to around 64K, which is big enough for almost everybody.
|
offset limits the size of the compiled regex to around 64K, which is big enough
|
||||||
However, I received a request for an even bigger limit. For this reason, and
|
for almost everybody. However, I received a request for an even bigger limit.
|
||||||
also to make the code easier to maintain, the storing and loading of offsets
|
For this reason, and also to make the code easier to maintain, the storing and
|
||||||
from the byte string is now handled by the macros that are defined here.
|
loading of offsets from the byte string is now handled by the macros that are
|
||||||
|
defined here.
|
||||||
|
|
||||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
|
The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
|
||||||
the config.h file, but can be overridden by using -D on the command line. This
|
the config.h file, but can be overridden by using -D on the command line. This
|
||||||
|
@ -173,6 +239,116 @@ capturing parenthesis numbers in back references. */
|
||||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
|
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
|
||||||
|
|
||||||
|
|
||||||
|
/* When UTF-8 encoding is being used, a character is no longer just a single
|
||||||
|
byte. The macros for character handling generate simple sequences when used in
|
||||||
|
byte-mode, and more complicated ones for UTF-8 characters. */
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UTF8
|
||||||
|
#define GETCHAR(c, eptr) c = *eptr;
|
||||||
|
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||||
|
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||||
|
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||||
|
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||||
|
#define BACKCHAR(eptr)
|
||||||
|
|
||||||
|
#else /* SUPPORT_UTF8 */
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||||
|
we know we are in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define GETCHAR(c, eptr) \
|
||||||
|
c = *eptr; \
|
||||||
|
if ((c & 0xc0) == 0xc0) \
|
||||||
|
{ \
|
||||||
|
int gcii; \
|
||||||
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||||
|
int gcss = 6*gcaa; \
|
||||||
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||||
|
for (gcii = 1; gcii <= gcaa; gcii++) \
|
||||||
|
{ \
|
||||||
|
gcss -= 6; \
|
||||||
|
c |= (eptr[gcii] & 0x3f) << gcss; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
|
||||||
|
pointer. */
|
||||||
|
|
||||||
|
#define GETCHARTEST(c, eptr) \
|
||||||
|
c = *eptr; \
|
||||||
|
if (utf8 && (c & 0xc0) == 0xc0) \
|
||||||
|
{ \
|
||||||
|
int gcii; \
|
||||||
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||||
|
int gcss = 6*gcaa; \
|
||||||
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||||
|
for (gcii = 1; gcii <= gcaa; gcii++) \
|
||||||
|
{ \
|
||||||
|
gcss -= 6; \
|
||||||
|
c |= (eptr[gcii] & 0x3f) << gcss; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||||
|
know we are in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define GETCHARINC(c, eptr) \
|
||||||
|
c = *eptr++; \
|
||||||
|
if ((c & 0xc0) == 0xc0) \
|
||||||
|
{ \
|
||||||
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||||
|
int gcss = 6*gcaa; \
|
||||||
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||||
|
while (gcaa-- > 0) \
|
||||||
|
{ \
|
||||||
|
gcss -= 6; \
|
||||||
|
c |= (*eptr++ & 0x3f) << gcss; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the next character, testing for UTF-8 mode, and advancing the pointer */
|
||||||
|
|
||||||
|
#define GETCHARINCTEST(c, eptr) \
|
||||||
|
c = *eptr++; \
|
||||||
|
if (utf8 && (c & 0xc0) == 0xc0) \
|
||||||
|
{ \
|
||||||
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||||
|
int gcss = 6*gcaa; \
|
||||||
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||||
|
while (gcaa-- > 0) \
|
||||||
|
{ \
|
||||||
|
gcss -= 6; \
|
||||||
|
c |= (*eptr++ & 0x3f) << gcss; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
||||||
|
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define GETCHARLEN(c, eptr, len) \
|
||||||
|
c = *eptr; \
|
||||||
|
if ((c & 0xc0) == 0xc0) \
|
||||||
|
{ \
|
||||||
|
int gcii; \
|
||||||
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||||
|
int gcss = 6*gcaa; \
|
||||||
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||||
|
for (gcii = 1; gcii <= gcaa; gcii++) \
|
||||||
|
{ \
|
||||||
|
gcss -= 6; \
|
||||||
|
c |= (eptr[gcii] & 0x3f) << gcss; \
|
||||||
|
} \
|
||||||
|
len += gcaa; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the pointer is not at the start of a character, move it back until
|
||||||
|
it is. Called only in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* In case there is no definition of offsetof() provided - though any proper
|
/* In case there is no definition of offsetof() provided - though any proper
|
||||||
Standard C system should have one. */
|
Standard C system should have one. */
|
||||||
|
|
||||||
|
@ -180,6 +356,7 @@ Standard C system should have one. */
|
||||||
#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
|
#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* These are the public options that can change during matching. */
|
/* These are the public options that can change during matching. */
|
||||||
|
|
||||||
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
||||||
|
@ -187,32 +364,38 @@ Standard C system should have one. */
|
||||||
/* Private options flags start at the most significant end of the four bytes,
|
/* Private options flags start at the most significant end of the four bytes,
|
||||||
but skip the top bit so we can use ints for convenience without getting tangled
|
but skip the top bit so we can use ints for convenience without getting tangled
|
||||||
with negative values. The public options defined in pcre.h start at the least
|
with negative values. The public options defined in pcre.h start at the least
|
||||||
significant end. Make sure they don't overlap, though now that we have expanded
|
significant end. Make sure they don't overlap! */
|
||||||
to four bytes there is plenty of space. */
|
|
||||||
|
|
||||||
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
|
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
|
||||||
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
||||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
||||||
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
|
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
|
||||||
|
#define PCRE_NOPARTIAL 0x04000000 /* can't use partial with this regex */
|
||||||
|
|
||||||
/* Options for the "extra" block produced by pcre_study(). */
|
/* Options for the "extra" block produced by pcre_study(). */
|
||||||
|
|
||||||
#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */
|
#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */
|
||||||
|
|
||||||
/* Masks for identifying the public options which are permitted at compile
|
/* Masks for identifying the public options that are permitted at compile
|
||||||
time, run time or study time, respectively. */
|
time, run time, or study time, respectively. */
|
||||||
|
|
||||||
#define PUBLIC_OPTIONS \
|
#define PUBLIC_OPTIONS \
|
||||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK)
|
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE)
|
||||||
|
|
||||||
#define PUBLIC_EXEC_OPTIONS \
|
#define PUBLIC_EXEC_OPTIONS \
|
||||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK)
|
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||||
|
PCRE_PARTIAL)
|
||||||
|
|
||||||
|
#define PUBLIC_DFA_EXEC_OPTIONS \
|
||||||
|
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||||
|
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART)
|
||||||
|
|
||||||
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
||||||
|
|
||||||
/* Magic number to provide a small check against being handed junk. */
|
/* Magic number to provide a small check against being handed junk. Also used
|
||||||
|
to detect whether a pattern was compiled on a host of different endianness. */
|
||||||
|
|
||||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||||
|
|
||||||
|
@ -221,6 +404,11 @@ time, run time or study time, respectively. */
|
||||||
#define REQ_UNSET (-2)
|
#define REQ_UNSET (-2)
|
||||||
#define REQ_NONE (-1)
|
#define REQ_NONE (-1)
|
||||||
|
|
||||||
|
/* The maximum remaining length of subject we are prepared to search for a
|
||||||
|
req_byte match. */
|
||||||
|
|
||||||
|
#define REQ_BYTE_MAX 1000
|
||||||
|
|
||||||
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
|
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
|
||||||
variable-length repeat, or a anything other than literal characters. */
|
variable-length repeat, or a anything other than literal characters. */
|
||||||
|
|
||||||
|
@ -231,6 +419,9 @@ variable-length repeat, or a anything other than literal characters. */
|
||||||
|
|
||||||
typedef int BOOL;
|
typedef int BOOL;
|
||||||
|
|
||||||
|
// #define FALSE 0
|
||||||
|
// #define TRUE 1
|
||||||
|
|
||||||
/* Escape items that are just an encoding of a particular data value. Note that
|
/* Escape items that are just an encoding of a particular data value. Note that
|
||||||
ESC_n is defined as yet another macro, which is set in config.h to either \n
|
ESC_n is defined as yet another macro, which is set in config.h to either \n
|
||||||
(the default) or \r (which some people want). */
|
(the default) or \r (which some people want). */
|
||||||
|
@ -265,12 +456,13 @@ definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
||||||
corresponds to "." rather than an escape sequence. The final one must be
|
corresponds to "." rather than an escape sequence. The final one must be
|
||||||
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
||||||
tests in the code for an escape greater than ESC_b and less than ESC_Z to
|
tests in the code for an escape greater than ESC_b and less than ESC_Z to
|
||||||
detect the types that may be repeated. These are the types that consume a
|
detect the types that may be repeated. These are the types that consume
|
||||||
character. If any new escapes are put in between that don't consume a
|
characters. If any new escapes are put in between that don't consume a
|
||||||
character, that code will have to change. */
|
character, that code will have to change. */
|
||||||
|
|
||||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
||||||
ESC_w, ESC_dum1, ESC_C, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_REF };
|
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E,
|
||||||
|
ESC_Q, ESC_REF };
|
||||||
|
|
||||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||||
contain UTF-8 characters with values greater than 255. */
|
contain UTF-8 characters with values greater than 255. */
|
||||||
|
@ -281,6 +473,8 @@ contain UTF-8 characters with values greater than 255. */
|
||||||
#define XCL_END 0 /* Marks end of individual items */
|
#define XCL_END 0 /* Marks end of individual items */
|
||||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||||
|
#define XCL_PROP 3 /* Unicode property (one property code) follows */
|
||||||
|
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||||
|
|
||||||
|
|
||||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||||
|
@ -306,110 +500,123 @@ enum {
|
||||||
OP_WORDCHAR, /* 10 \w */
|
OP_WORDCHAR, /* 10 \w */
|
||||||
OP_ANY, /* 11 Match any character */
|
OP_ANY, /* 11 Match any character */
|
||||||
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||||
OP_EODN, /* 13 End of data or \n at end of data: \Z. */
|
OP_NOTPROP, /* 13 \P (not Unicode property) */
|
||||||
OP_EOD, /* 14 End of data: \z */
|
OP_PROP, /* 14 \p (Unicode property) */
|
||||||
|
OP_EXTUNI, /* 15 \X (extended Unicode sequence */
|
||||||
|
OP_EODN, /* 16 End of data or \n at end of data: \Z. */
|
||||||
|
OP_EOD, /* 17 End of data: \z */
|
||||||
|
|
||||||
OP_OPT, /* 15 Set runtime options */
|
OP_OPT, /* 18 Set runtime options */
|
||||||
OP_CIRC, /* 16 Start of line - varies with multiline switch */
|
OP_CIRC, /* 19 Start of line - varies with multiline switch */
|
||||||
OP_DOLL, /* 17 End of line - varies with multiline switch */
|
OP_DOLL, /* 20 End of line - varies with multiline switch */
|
||||||
OP_CHARS, /* 18 Match string of characters */
|
OP_CHAR, /* 21 Match one character, casefully */
|
||||||
OP_NOT, /* 19 Match anything but the following char */
|
OP_CHARNC, /* 22 Match one character, caselessly */
|
||||||
|
OP_NOT, /* 23 Match anything but the following char */
|
||||||
|
|
||||||
OP_STAR, /* 20 The maximizing and minimizing versions of */
|
OP_STAR, /* 24 The maximizing and minimizing versions of */
|
||||||
OP_MINSTAR, /* 21 all these opcodes must come in pairs, with */
|
OP_MINSTAR, /* 25 all these opcodes must come in pairs, with */
|
||||||
OP_PLUS, /* 22 the minimizing one second. */
|
OP_PLUS, /* 26 the minimizing one second. */
|
||||||
OP_MINPLUS, /* 23 This first set applies to single characters */
|
OP_MINPLUS, /* 27 This first set applies to single characters */
|
||||||
OP_QUERY, /* 24 */
|
OP_QUERY, /* 28 */
|
||||||
OP_MINQUERY, /* 25 */
|
OP_MINQUERY, /* 29 */
|
||||||
OP_UPTO, /* 26 From 0 to n matches */
|
OP_UPTO, /* 30 From 0 to n matches */
|
||||||
OP_MINUPTO, /* 27 */
|
OP_MINUPTO, /* 31 */
|
||||||
OP_EXACT, /* 28 Exactly n matches */
|
OP_EXACT, /* 32 Exactly n matches */
|
||||||
|
|
||||||
OP_NOTSTAR, /* 29 The maximizing and minimizing versions of */
|
OP_NOTSTAR, /* 33 The maximizing and minimizing versions of */
|
||||||
OP_NOTMINSTAR, /* 30 all these opcodes must come in pairs, with */
|
OP_NOTMINSTAR, /* 34 all these opcodes must come in pairs, with */
|
||||||
OP_NOTPLUS, /* 31 the minimizing one second. */
|
OP_NOTPLUS, /* 35 the minimizing one second. */
|
||||||
OP_NOTMINPLUS, /* 32 This set applies to "not" single characters */
|
OP_NOTMINPLUS, /* 36 This set applies to "not" single characters */
|
||||||
OP_NOTQUERY, /* 33 */
|
OP_NOTQUERY, /* 37 */
|
||||||
OP_NOTMINQUERY, /* 34 */
|
OP_NOTMINQUERY, /* 38 */
|
||||||
OP_NOTUPTO, /* 35 From 0 to n matches */
|
OP_NOTUPTO, /* 39 From 0 to n matches */
|
||||||
OP_NOTMINUPTO, /* 36 */
|
OP_NOTMINUPTO, /* 40 */
|
||||||
OP_NOTEXACT, /* 37 Exactly n matches */
|
OP_NOTEXACT, /* 41 Exactly n matches */
|
||||||
|
|
||||||
OP_TYPESTAR, /* 38 The maximizing and minimizing versions of */
|
OP_TYPESTAR, /* 42 The maximizing and minimizing versions of */
|
||||||
OP_TYPEMINSTAR, /* 39 all these opcodes must come in pairs, with */
|
OP_TYPEMINSTAR, /* 43 all these opcodes must come in pairs, with */
|
||||||
OP_TYPEPLUS, /* 40 the minimizing one second. These codes must */
|
OP_TYPEPLUS, /* 44 the minimizing one second. These codes must */
|
||||||
OP_TYPEMINPLUS, /* 41 be in exactly the same order as those above. */
|
OP_TYPEMINPLUS, /* 45 be in exactly the same order as those above. */
|
||||||
OP_TYPEQUERY, /* 42 This set applies to character types such as \d */
|
OP_TYPEQUERY, /* 46 This set applies to character types such as \d */
|
||||||
OP_TYPEMINQUERY, /* 43 */
|
OP_TYPEMINQUERY, /* 47 */
|
||||||
OP_TYPEUPTO, /* 44 From 0 to n matches */
|
OP_TYPEUPTO, /* 48 From 0 to n matches */
|
||||||
OP_TYPEMINUPTO, /* 45 */
|
OP_TYPEMINUPTO, /* 49 */
|
||||||
OP_TYPEEXACT, /* 46 Exactly n matches */
|
OP_TYPEEXACT, /* 50 Exactly n matches */
|
||||||
|
|
||||||
OP_CRSTAR, /* 47 The maximizing and minimizing versions of */
|
OP_CRSTAR, /* 51 The maximizing and minimizing versions of */
|
||||||
OP_CRMINSTAR, /* 48 all these opcodes must come in pairs, with */
|
OP_CRMINSTAR, /* 52 all these opcodes must come in pairs, with */
|
||||||
OP_CRPLUS, /* 49 the minimizing one second. These codes must */
|
OP_CRPLUS, /* 53 the minimizing one second. These codes must */
|
||||||
OP_CRMINPLUS, /* 50 be in exactly the same order as those above. */
|
OP_CRMINPLUS, /* 54 be in exactly the same order as those above. */
|
||||||
OP_CRQUERY, /* 51 These are for character classes and back refs */
|
OP_CRQUERY, /* 55 These are for character classes and back refs */
|
||||||
OP_CRMINQUERY, /* 52 */
|
OP_CRMINQUERY, /* 56 */
|
||||||
OP_CRRANGE, /* 53 These are different to the three seta above. */
|
OP_CRRANGE, /* 57 These are different to the three sets above. */
|
||||||
OP_CRMINRANGE, /* 54 */
|
OP_CRMINRANGE, /* 58 */
|
||||||
|
|
||||||
OP_CLASS, /* 55 Match a character class, chars < 256 only */
|
OP_CLASS, /* 59 Match a character class, chars < 256 only */
|
||||||
OP_NCLASS, /* 56 Same, but the bitmap was created from a negative
|
OP_NCLASS, /* 60 Same, but the bitmap was created from a negative
|
||||||
class - the difference is relevant only when a UTF-8
|
class - the difference is relevant only when a UTF-8
|
||||||
character > 255 is encountered. */
|
character > 255 is encountered. */
|
||||||
|
|
||||||
OP_XCLASS, /* 57 Extended class for handling UTF-8 chars within the
|
OP_XCLASS, /* 61 Extended class for handling UTF-8 chars within the
|
||||||
class. This does both positive and negative. */
|
class. This does both positive and negative. */
|
||||||
|
|
||||||
OP_REF, /* 58 Match a back reference */
|
OP_REF, /* 62 Match a back reference */
|
||||||
OP_RECURSE, /* 59 Match a numbered subpattern (possibly recursive) */
|
OP_RECURSE, /* 63 Match a numbered subpattern (possibly recursive) */
|
||||||
OP_CALLOUT, /* 60 Call out to external function if provided */
|
OP_CALLOUT, /* 64 Call out to external function if provided */
|
||||||
|
|
||||||
OP_ALT, /* 61 Start of alternation */
|
OP_ALT, /* 65 Start of alternation */
|
||||||
OP_KET, /* 62 End of group that doesn't have an unbounded repeat */
|
OP_KET, /* 66 End of group that doesn't have an unbounded repeat */
|
||||||
OP_KETRMAX, /* 63 These two must remain together and in this */
|
OP_KETRMAX, /* 67 These two must remain together and in this */
|
||||||
OP_KETRMIN, /* 64 order. They are for groups the repeat for ever. */
|
OP_KETRMIN, /* 68 order. They are for groups the repeat for ever. */
|
||||||
|
|
||||||
/* The assertions must come before ONCE and COND */
|
/* The assertions must come before ONCE and COND */
|
||||||
|
|
||||||
OP_ASSERT, /* 65 Positive lookahead */
|
OP_ASSERT, /* 69 Positive lookahead */
|
||||||
OP_ASSERT_NOT, /* 66 Negative lookahead */
|
OP_ASSERT_NOT, /* 70 Negative lookahead */
|
||||||
OP_ASSERTBACK, /* 67 Positive lookbehind */
|
OP_ASSERTBACK, /* 71 Positive lookbehind */
|
||||||
OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */
|
OP_ASSERTBACK_NOT, /* 72 Negative lookbehind */
|
||||||
OP_REVERSE, /* 69 Move pointer back - used in lookbehind assertions */
|
OP_REVERSE, /* 73 Move pointer back - used in lookbehind assertions */
|
||||||
|
|
||||||
/* ONCE and COND must come after the assertions, with ONCE first, as there's
|
/* ONCE and COND must come after the assertions, with ONCE first, as there's
|
||||||
a test for >= ONCE for a subpattern that isn't an assertion. */
|
a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||||
|
|
||||||
OP_ONCE, /* 70 Once matched, don't back up into the subpattern */
|
OP_ONCE, /* 74 Once matched, don't back up into the subpattern */
|
||||||
OP_COND, /* 71 Conditional group */
|
OP_COND, /* 75 Conditional group */
|
||||||
OP_CREF, /* 72 Used to hold an extraction string number (cond ref) */
|
OP_CREF, /* 76 Used to hold an extraction string number (cond ref) */
|
||||||
|
|
||||||
OP_BRAZERO, /* 73 These two must remain together and in this */
|
OP_BRAZERO, /* 77 These two must remain together and in this */
|
||||||
OP_BRAMINZERO, /* 74 order. */
|
OP_BRAMINZERO, /* 78 order. */
|
||||||
|
|
||||||
OP_BRANUMBER, /* 75 Used for extracting brackets whose number is greater
|
OP_BRANUMBER, /* 79 Used for extracting brackets whose number is greater
|
||||||
than can fit into an opcode. */
|
than can fit into an opcode. */
|
||||||
|
|
||||||
OP_BRA /* 76 This and greater values are used for brackets that
|
OP_BRA /* 80 This and greater values are used for brackets that
|
||||||
extract substrings up to a basic limit. After that,
|
extract substrings up to EXTRACT_BASIC_MAX. After
|
||||||
use is made of OP_BRANUMBER. */
|
that, use is made of OP_BRANUMBER. */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* WARNING: There is an implicit assumption in study.c that all opcodes are
|
/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
|
||||||
less than 128 in value. This makes handling UTF-8 character sequences easier.
|
study.c that all opcodes are less than 128 in value. This makes handling UTF-8
|
||||||
*/
|
character sequences easier. */
|
||||||
|
|
||||||
|
/* The highest extraction number before we have to start using additional
|
||||||
|
bytes. (Originally PCRE didn't have support for extraction counts highter than
|
||||||
|
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
||||||
|
i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
|
||||||
|
opcodes. */
|
||||||
|
|
||||||
|
#define EXTRACT_BASIC_MAX 100
|
||||||
|
|
||||||
|
|
||||||
/* This macro defines textual names for all the opcodes. There are used only
|
/* This macro defines textual names for all the opcodes. These are used only
|
||||||
for debugging, in pcre.c when DEBUG is defined, and also in pcretest.c. The
|
for debugging. The macro is referenced only in pcre_printint.c. */
|
||||||
macro is referenced only in printint.c. */
|
|
||||||
|
|
||||||
#define OP_NAME_LIST \
|
#define OP_NAME_LIST \
|
||||||
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
||||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", "\\Z", "\\z", \
|
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
||||||
"Opt", "^", "$", "chars", "not", \
|
"notprop", "prop", "extuni", \
|
||||||
|
"\\Z", "\\z", \
|
||||||
|
"Opt", "^", "$", "char", "charnc", "not", \
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||||
|
@ -423,7 +630,7 @@ macro is referenced only in printint.c. */
|
||||||
/* This macro defines the length of fixed length operations in the compiled
|
/* This macro defines the length of fixed length operations in the compiled
|
||||||
regex. The lengths are used when searching for specific things, and also in the
|
regex. The lengths are used when searching for specific things, and also in the
|
||||||
debugging printing of a compiled regex. We use a macro so that it can be
|
debugging printing of a compiled regex. We use a macro so that it can be
|
||||||
incorporated both into pcre.c and pcretest.c without being publicly exposed.
|
defined close to the definitions of the opcodes themselves.
|
||||||
|
|
||||||
As things have been extended, some of these are no longer fixed lenths, but are
|
As things have been extended, some of these are no longer fixed lenths, but are
|
||||||
minima instead. For example, the length of a single-character repeat may vary
|
minima instead. For example, the length of a single-character repeat may vary
|
||||||
|
@ -432,8 +639,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||||
#define OP_LENGTHS \
|
#define OP_LENGTHS \
|
||||||
1, /* End */ \
|
1, /* End */ \
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
||||||
1, 1, 1, 1, 2, 1, 1, /* Any, Anybyte, \Z, \z, Opt, ^, $ */ \
|
1, 1, /* Any, Anybyte */ \
|
||||||
2, /* Chars - the minimum length */ \
|
2, 2, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||||
|
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||||
|
2, /* Char - the minimum length */ \
|
||||||
|
2, /* Charnc - the minimum length */ \
|
||||||
2, /* not */ \
|
2, /* not */ \
|
||||||
/* Positive single-char repeats ** These are */ \
|
/* Positive single-char repeats ** These are */ \
|
||||||
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
|
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
|
||||||
|
@ -452,7 +662,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||||
0, /* XCLASS - variable length */ \
|
0, /* XCLASS - variable length */ \
|
||||||
3, /* REF */ \
|
3, /* REF */ \
|
||||||
1+LINK_SIZE, /* RECURSE */ \
|
1+LINK_SIZE, /* RECURSE */ \
|
||||||
2, /* CALLOUT */ \
|
2+2*LINK_SIZE, /* CALLOUT */ \
|
||||||
1+LINK_SIZE, /* Alt */ \
|
1+LINK_SIZE, /* Alt */ \
|
||||||
1+LINK_SIZE, /* Ket */ \
|
1+LINK_SIZE, /* Ket */ \
|
||||||
1+LINK_SIZE, /* KetRmax */ \
|
1+LINK_SIZE, /* KetRmax */ \
|
||||||
|
@ -470,98 +680,61 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||||
1+LINK_SIZE /* BRA */ \
|
1+LINK_SIZE /* BRA */ \
|
||||||
|
|
||||||
|
|
||||||
/* The highest extraction number before we have to start using additional
|
|
||||||
bytes. (Originally PCRE didn't have support for extraction counts highter than
|
|
||||||
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
|
||||||
i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
|
|
||||||
opcodes. */
|
|
||||||
|
|
||||||
#define EXTRACT_BASIC_MAX 150
|
|
||||||
|
|
||||||
/* A magic value for OP_CREF to indicate the "in recursion" condition. */
|
/* A magic value for OP_CREF to indicate the "in recursion" condition. */
|
||||||
|
|
||||||
#define CREF_RECURSE 0xffff
|
#define CREF_RECURSE 0xffff
|
||||||
|
|
||||||
/* The texts of compile-time error messages are defined as macros here so that
|
/* Error code numbers. They are given names so that they can more easily be
|
||||||
they can be accessed by the POSIX wrapper and converted into error codes. Yes,
|
tracked. */
|
||||||
I could have used error codes in the first place, but didn't feel like changing
|
|
||||||
just to accommodate the POSIX wrapper. */
|
|
||||||
|
|
||||||
#define ERR1 "\\ at end of pattern"
|
enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||||
#define ERR2 "\\c at end of pattern"
|
ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
|
||||||
#define ERR3 "unrecognized character follows \\"
|
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||||
#define ERR4 "numbers out of order in {} quantifier"
|
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||||
#define ERR5 "number too big in {} quantifier"
|
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };
|
||||||
#define ERR6 "missing terminating ] for character class"
|
|
||||||
#define ERR7 "invalid escape sequence in character class"
|
|
||||||
#define ERR8 "range out of order in character class"
|
|
||||||
#define ERR9 "nothing to repeat"
|
|
||||||
#define ERR10 "operand of unlimited repeat could match the empty string"
|
|
||||||
#define ERR11 "internal error: unexpected repeat"
|
|
||||||
#define ERR12 "unrecognized character after (?"
|
|
||||||
#define ERR13 "POSIX named classes are supported only within a class"
|
|
||||||
#define ERR14 "missing )"
|
|
||||||
#define ERR15 "reference to non-existent subpattern"
|
|
||||||
#define ERR16 "erroffset passed as NULL"
|
|
||||||
#define ERR17 "unknown option bit(s) set"
|
|
||||||
#define ERR18 "missing ) after comment"
|
|
||||||
#define ERR19 "parentheses nested too deeply"
|
|
||||||
#define ERR20 "regular expression too large"
|
|
||||||
#define ERR21 "failed to get memory"
|
|
||||||
#define ERR22 "unmatched parentheses"
|
|
||||||
#define ERR23 "internal error: code overflow"
|
|
||||||
#define ERR24 "unrecognized character after (?<"
|
|
||||||
#define ERR25 "lookbehind assertion is not fixed length"
|
|
||||||
#define ERR26 "malformed number after (?("
|
|
||||||
#define ERR27 "conditional group contains more than two branches"
|
|
||||||
#define ERR28 "assertion expected after (?("
|
|
||||||
#define ERR29 "(?R or (?digits must be followed by )"
|
|
||||||
#define ERR30 "unknown POSIX class name"
|
|
||||||
#define ERR31 "POSIX collating elements are not supported"
|
|
||||||
#define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support"
|
|
||||||
#define ERR33 "spare error"
|
|
||||||
#define ERR34 "character value in \\x{...} sequence is too large"
|
|
||||||
#define ERR35 "invalid condition (?(0)"
|
|
||||||
#define ERR36 "\\C not allowed in lookbehind assertion"
|
|
||||||
#define ERR37 "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X"
|
|
||||||
#define ERR38 "number after (?C is > 255"
|
|
||||||
#define ERR39 "closing ) for (?C expected"
|
|
||||||
#define ERR40 "recursive call could loop indefinitely"
|
|
||||||
#define ERR41 "unrecognized character after (?P"
|
|
||||||
#define ERR42 "syntax error after (?P"
|
|
||||||
#define ERR43 "two named groups have the same name"
|
|
||||||
#define ERR44 "invalid UTF-8 string"
|
|
||||||
|
|
||||||
/* All character handling must be done as unsigned characters. Otherwise there
|
|
||||||
are problems with top-bit-set characters and functions such as isspace().
|
|
||||||
However, we leave the interface to the outside world as char *, because that
|
|
||||||
should make things easier for callers. We define a short type for unsigned char
|
|
||||||
to save lots of typing. I tried "uchar", but it causes problems on Digital
|
|
||||||
Unix, where it is defined in sys/types, so use "uschar" instead. */
|
|
||||||
|
|
||||||
typedef unsigned char uschar;
|
|
||||||
|
|
||||||
/* The real format of the start of the pcre block; the index of names and the
|
/* The real format of the start of the pcre block; the index of names and the
|
||||||
code vector run on as long as necessary after the end. */
|
code vector run on as long as necessary after the end. We store an explicit
|
||||||
|
offset to the name table so that if a regex is compiled on one host, saved, and
|
||||||
|
then run on another where the size of pointers is different, all might still
|
||||||
|
be well. For the case of compiled-on-4 and run-on-8, we include an extra
|
||||||
|
pointer that is always NULL. For future-proofing, a few dummy fields were
|
||||||
|
originally included - even though you can never get this planning right - but
|
||||||
|
there is only one left now.
|
||||||
|
|
||||||
|
NOTE NOTE NOTE:
|
||||||
|
Because people can now save and re-use compiled patterns, any additions to this
|
||||||
|
structure should be made at the end, and something earlier (e.g. a new
|
||||||
|
flag in the options or one of the dummy fields) should indicate that the new
|
||||||
|
fields are present. Currently PCRE always sets the dummy fields to zero.
|
||||||
|
NOTE NOTE NOTE:
|
||||||
|
*/
|
||||||
|
|
||||||
typedef struct real_pcre {
|
typedef struct real_pcre {
|
||||||
unsigned long int magic_number;
|
pcre_uint32 magic_number;
|
||||||
size_t size; /* Total that was malloced */
|
pcre_uint32 size; /* Total that was malloced */
|
||||||
const unsigned char *tables; /* Pointer to tables */
|
pcre_uint32 options;
|
||||||
unsigned long int options;
|
pcre_uint32 dummy1; /* For future use, maybe */
|
||||||
unsigned short int top_bracket;
|
|
||||||
unsigned short int top_backref;
|
pcre_uint16 top_bracket;
|
||||||
unsigned short int first_byte;
|
pcre_uint16 top_backref;
|
||||||
unsigned short int req_byte;
|
pcre_uint16 first_byte;
|
||||||
unsigned short int name_entry_size; /* Size of any name items; 0 => none */
|
pcre_uint16 req_byte;
|
||||||
unsigned short int name_count; /* Number of name items */
|
pcre_uint16 name_table_offset; /* Offset to name table that follows */
|
||||||
|
pcre_uint16 name_entry_size; /* Size of any name items */
|
||||||
|
pcre_uint16 name_count; /* Number of name items */
|
||||||
|
pcre_uint16 ref_count; /* Reference count */
|
||||||
|
|
||||||
|
const unsigned char *tables; /* Pointer to tables or NULL for std */
|
||||||
|
const unsigned char *nullpad; /* NULL padding */
|
||||||
} real_pcre;
|
} real_pcre;
|
||||||
|
|
||||||
/* The format of the block used to store data from pcre_study(). */
|
/* The format of the block used to store data from pcre_study(). The same
|
||||||
|
remark (see NOTE above) about extending this structure applies. */
|
||||||
|
|
||||||
typedef struct pcre_study_data {
|
typedef struct pcre_study_data {
|
||||||
size_t size; /* Total that was malloced */
|
pcre_uint32 size; /* Total that was malloced */
|
||||||
uschar options;
|
pcre_uint32 options;
|
||||||
uschar start_bits[32];
|
uschar start_bits[32];
|
||||||
} pcre_study_data;
|
} pcre_study_data;
|
||||||
|
|
||||||
|
@ -574,12 +747,14 @@ typedef struct compile_data {
|
||||||
const uschar *cbits; /* Points to character type table */
|
const uschar *cbits; /* Points to character type table */
|
||||||
const uschar *ctypes; /* Points to table of type maps */
|
const uschar *ctypes; /* Points to table of type maps */
|
||||||
const uschar *start_code; /* The start of the compiled code */
|
const uschar *start_code; /* The start of the compiled code */
|
||||||
|
const uschar *start_pattern; /* The start of the pattern */
|
||||||
uschar *name_table; /* The name/number table */
|
uschar *name_table; /* The name/number table */
|
||||||
int names_found; /* Number of entries so far */
|
int names_found; /* Number of entries so far */
|
||||||
int name_entry_size; /* Size of each entry */
|
int name_entry_size; /* Size of each entry */
|
||||||
int top_backref; /* Maximum back reference */
|
int top_backref; /* Maximum back reference */
|
||||||
unsigned int backref_map; /* Bitmap of low back refs */
|
unsigned int backref_map; /* Bitmap of low back refs */
|
||||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||||
|
BOOL nopartial; /* Set TRUE if partial won't work */
|
||||||
} compile_data;
|
} compile_data;
|
||||||
|
|
||||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||||
|
@ -613,7 +788,7 @@ NOTE: This isn't used for a "normal" compilation of pcre. */
|
||||||
struct heapframe;
|
struct heapframe;
|
||||||
|
|
||||||
/* Structure for passing "static" information around between the functions
|
/* Structure for passing "static" information around between the functions
|
||||||
doing the matching, so that they are thread-safe. */
|
doing traditional NFA matching, so that they are thread-safe. */
|
||||||
|
|
||||||
typedef struct match_data {
|
typedef struct match_data {
|
||||||
unsigned long int match_call_count; /* As it says */
|
unsigned long int match_call_count; /* As it says */
|
||||||
|
@ -629,6 +804,8 @@ typedef struct match_data {
|
||||||
BOOL utf8; /* UTF8 flag */
|
BOOL utf8; /* UTF8 flag */
|
||||||
BOOL endonly; /* Dollar not before final \n */
|
BOOL endonly; /* Dollar not before final \n */
|
||||||
BOOL notempty; /* Empty string match not wanted */
|
BOOL notempty; /* Empty string match not wanted */
|
||||||
|
BOOL partial; /* PARTIAL flag */
|
||||||
|
BOOL hitend; /* Hit the end of the subject at some point */
|
||||||
const uschar *start_code; /* For use when recursing */
|
const uschar *start_code; /* For use when recursing */
|
||||||
const uschar *start_subject; /* Start of the subject string */
|
const uschar *start_subject; /* Start of the subject string */
|
||||||
const uschar *end_subject; /* End of the subject string */
|
const uschar *end_subject; /* End of the subject string */
|
||||||
|
@ -642,6 +819,19 @@ typedef struct match_data {
|
||||||
struct heapframe *thisframe; /* Used only when compiling for no recursion */
|
struct heapframe *thisframe; /* Used only when compiling for no recursion */
|
||||||
} match_data;
|
} match_data;
|
||||||
|
|
||||||
|
/* A similar structure is used for the same purpose by the DFA matching
|
||||||
|
functions. */
|
||||||
|
|
||||||
|
typedef struct dfa_match_data {
|
||||||
|
const uschar *start_code; /* Start of the compiled pattern */
|
||||||
|
const uschar *start_subject; /* Start of the subject string */
|
||||||
|
const uschar *end_subject; /* End of subject string */
|
||||||
|
const uschar *tables; /* Character tables */
|
||||||
|
int moptions; /* Match options */
|
||||||
|
int poptions; /* Pattern options */
|
||||||
|
void *callout_data; /* To pass back to callouts */
|
||||||
|
} dfa_match_data;
|
||||||
|
|
||||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
/* Bit definitions for entries in the pcre_ctypes table. */
|
||||||
|
|
||||||
#define ctype_space 0x01
|
#define ctype_space 0x01
|
||||||
|
@ -675,4 +865,46 @@ total length. */
|
||||||
#define ctypes_offset (cbits_offset + cbit_length)
|
#define ctypes_offset (cbits_offset + cbit_length)
|
||||||
#define tables_length (ctypes_offset + 256)
|
#define tables_length (ctypes_offset + 256)
|
||||||
|
|
||||||
/* End of internal.h */
|
/* Layout of the UCP type table that translates property names into codes for
|
||||||
|
pcre_ucp_findchar(). */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *name;
|
||||||
|
int value;
|
||||||
|
} ucp_type_table;
|
||||||
|
|
||||||
|
|
||||||
|
/* Internal shared data tables. These are tables that are used by more than one
|
||||||
|
of the exported public functions. They have to be "external" in the C sense,
|
||||||
|
but are not part of the PCRE public API. The data for these tables is in the
|
||||||
|
pcre_tables.c module. */
|
||||||
|
|
||||||
|
extern const int _pcre_utf8_table1[];
|
||||||
|
extern const int _pcre_utf8_table2[];
|
||||||
|
extern const int _pcre_utf8_table3[];
|
||||||
|
extern const uschar _pcre_utf8_table4[];
|
||||||
|
|
||||||
|
extern const int _pcre_utf8_table1_size;
|
||||||
|
|
||||||
|
extern const ucp_type_table _pcre_utt[];
|
||||||
|
extern const int _pcre_utt_size;
|
||||||
|
|
||||||
|
extern const uschar _pcre_default_tables[];
|
||||||
|
|
||||||
|
extern const uschar _pcre_OP_lengths[];
|
||||||
|
|
||||||
|
|
||||||
|
/* Internal shared functions. These are functions that are used by more than
|
||||||
|
one of the exported public functions. They have to be "external" in the C
|
||||||
|
sense, but are not part of the PCRE public API. */
|
||||||
|
|
||||||
|
extern int _pcre_ord2utf8(int, uschar *);
|
||||||
|
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||||
|
const pcre_study_data *, pcre_study_data *);
|
||||||
|
extern int _pcre_ucp_findchar(const int, int *, int *);
|
||||||
|
extern int _pcre_valid_utf8(const uschar *, int);
|
||||||
|
extern BOOL _pcre_xclass(int, const uschar *);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* End of pcre_internal.h */
|
|
@ -2,48 +2,53 @@
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/*
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
Redistribution and use in source and binary forms, with or without
|
||||||
computer system, and to redistribute it freely, subject to the following
|
modification, are permitted provided that the following conditions are met:
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
this list of conditions and the following disclaimer.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
explicit claim or by omission.
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
misrepresented as being the original software.
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
supersede any condition above with which it is incompatible.
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
See the file Tech.Notes for some information on the internals.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/* This file is compiled on its own as part of the PCRE library. However,
|
/* This module contains the external function pcre_maketables(), which builds
|
||||||
it is also included in the compilation of dftables.c, in which case the macro
|
character tables for PCRE in the current locale. The file is compiled on its
|
||||||
DFTABLES is defined. */
|
own as part of the PCRE library. However, it is also included in the
|
||||||
|
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||||
|
|
||||||
|
|
||||||
#ifndef DFTABLES
|
#ifndef DFTABLES
|
||||||
#include "internal.h"
|
#include "pcre_internal.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Create PCRE character tables *
|
* Create PCRE character tables *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -59,13 +64,13 @@ Returns: pointer to the contiguous block of data
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const unsigned char *
|
const unsigned char *
|
||||||
_pcre_maketables(void)
|
pcre_maketables(void)
|
||||||
{
|
{
|
||||||
unsigned char *yield, *p;
|
unsigned char *yield, *p;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
#ifndef DFTABLES
|
#ifndef DFTABLES
|
||||||
yield = (unsigned char*)(g_malloc)(tables_length);
|
yield = (unsigned char*)(pcre_malloc)(tables_length);
|
||||||
#else
|
#else
|
||||||
yield = (unsigned char*)malloc(tables_length);
|
yield = (unsigned char*)malloc(tables_length);
|
||||||
#endif
|
#endif
|
||||||
|
@ -137,4 +142,4 @@ for (i = 0; i < 256; i++)
|
||||||
return yield;
|
return yield;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of maketables.c */
|
/* End of pcre_maketables.c */
|
|
@ -0,0 +1,78 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This file contains a private PCRE function that converts an ordinal
|
||||||
|
character value into a UTF8 string. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Convert character value to UTF-8 *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function takes an integer value in the range 0 - 0x7fffffff
|
||||||
|
and encodes it as a UTF-8 character in 0 to 6 bytes.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
cvalue the character value
|
||||||
|
buffer pointer to buffer for result - at least 6 bytes long
|
||||||
|
|
||||||
|
Returns: number of characters placed in the buffer
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT int
|
||||||
|
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||||
|
{
|
||||||
|
register int i, j;
|
||||||
|
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||||
|
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||||
|
buffer += i;
|
||||||
|
for (j = i; j > 0; j--)
|
||||||
|
{
|
||||||
|
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||||
|
cvalue >>= 6;
|
||||||
|
}
|
||||||
|
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||||
|
return i + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_ord2utf8.c */
|
|
@ -2,41 +2,51 @@
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/*
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
This is a library of functions to support regular expressions whose syntax
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
and semantics are as close as possible to those of the Perl 5 language. See
|
|
||||||
the file Tech.Notes for some information on the internals.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
Redistribution and use in source and binary forms, with or without
|
||||||
computer system, and to redistribute it freely, subject to the following
|
modification, are permitted provided that the following conditions are met:
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
this list of conditions and the following disclaimer.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
explicit claim or by omission.
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
misrepresented as being the original software.
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
supersede any condition above with which it is incompatible.
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/* This module contains a debugging function for printing out the internal form
|
/* This module contains a PCRE private debugging function for printing out the
|
||||||
of a compiled regular expression. It is kept in a separate file so that it can
|
internal form of a compiled regular expression, along with some supporting
|
||||||
be #included both in the pcretest program, and in the library itself when
|
local functions. This source file is used in two places:
|
||||||
compiled with the debugging switch. */
|
|
||||||
|
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
|
||||||
|
(DEBUG defined in pcre_internal.h). It is not included in production compiles.
|
||||||
|
|
||||||
|
(2) It is always #included by pcretest.c, which can be asked to print out a
|
||||||
|
compiled regex for debugging purposes. */
|
||||||
|
|
||||||
|
|
||||||
static const char *OP_names[] = { OP_NAME_LIST };
|
static const char *OP_names[] = { OP_NAME_LIST };
|
||||||
|
@ -46,18 +56,6 @@ static const char *OP_names[] = { OP_NAME_LIST };
|
||||||
* Print single- or multi-byte character *
|
* Print single- or multi-byte character *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* These tables are actually copies of ones in pcre.c. If we compile the
|
|
||||||
library with debugging, they are included twice, but that isn't really a
|
|
||||||
problem - compiling with debugging is pretty rare and these are very small. */
|
|
||||||
|
|
||||||
static const int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
|
||||||
|
|
||||||
static const uschar utf8_t4[] = {
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
||||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
print_char(FILE *f, uschar *ptr, BOOL utf8)
|
print_char(FILE *f, uschar *ptr, BOOL utf8)
|
||||||
{
|
{
|
||||||
|
@ -71,11 +69,23 @@ if (!utf8 || (c & 0xc0) != 0xc0)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
|
int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||||
int s = 6*a;
|
int s = 6*a;
|
||||||
c = (c & utf8_t3[a]) << s;
|
c = (c & _pcre_utf8_table3[a]) << s;
|
||||||
for (i = 1; i <= a; i++)
|
for (i = 1; i <= a; i++)
|
||||||
{
|
{
|
||||||
|
/* This is a check for malformed UTF-8; it should only occur if the sanity
|
||||||
|
check has been turned off. Rather than swallow random bytes, just stop if
|
||||||
|
we hit a bad one. Print it with \X instead of \x as an indication. */
|
||||||
|
|
||||||
|
if ((ptr[i] & 0xc0) != 0x80)
|
||||||
|
{
|
||||||
|
fprintf(f, "\\X{%x}", c);
|
||||||
|
return i - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The byte is OK */
|
||||||
|
|
||||||
s -= 6;
|
s -= 6;
|
||||||
c |= (ptr[i] & 0x3f) << s;
|
c |= (ptr[i] & 0x3f) << s;
|
||||||
}
|
}
|
||||||
|
@ -86,19 +96,59 @@ else
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Find Unicode property name *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
static const char *
|
||||||
|
get_ucpname(int property)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
|
int i;
|
||||||
|
for (i = _pcre_utt_size; i >= 0; i--)
|
||||||
|
{
|
||||||
|
if (property == _pcre_utt[i].value) break;
|
||||||
|
}
|
||||||
|
return (i >= 0)? _pcre_utt[i].name : "??";
|
||||||
|
#else
|
||||||
|
return "??";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Print compiled regex *
|
* Print compiled regex *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
|
/* Make this function work for a regex with integers either byte order.
|
||||||
|
However, we assume that what we are passed is a compiled regex. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
print_internals(pcre *external_re, FILE *f)
|
pcre_printint(pcre *external_re, FILE *f)
|
||||||
{
|
{
|
||||||
real_pcre *re = (real_pcre *)external_re;
|
real_pcre *re = (real_pcre *)external_re;
|
||||||
uschar *codestart =
|
uschar *codestart, *code;
|
||||||
(uschar *)re + sizeof(real_pcre) + re->name_count * re->name_entry_size;
|
BOOL utf8;
|
||||||
uschar *code = codestart;
|
|
||||||
BOOL utf8 = (re->options & PCRE_UTF8) != 0;
|
unsigned int options = re->options;
|
||||||
|
int offset = re->name_table_offset;
|
||||||
|
int count = re->name_count;
|
||||||
|
int size = re->name_entry_size;
|
||||||
|
|
||||||
|
if (re->magic_number != MAGIC_NUMBER)
|
||||||
|
{
|
||||||
|
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
|
||||||
|
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
|
||||||
|
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
|
||||||
|
options = ((options << 24) & 0xff000000) |
|
||||||
|
((options << 8) & 0x00ff0000) |
|
||||||
|
((options >> 8) & 0x0000ff00) |
|
||||||
|
((options >> 24) & 0x000000ff);
|
||||||
|
}
|
||||||
|
|
||||||
|
code = codestart = (uschar *)re + offset + count * size;
|
||||||
|
utf8 = (options & PCRE_UTF8) != 0;
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
|
@ -106,7 +156,7 @@ for(;;)
|
||||||
int c;
|
int c;
|
||||||
int extra = 0;
|
int extra = 0;
|
||||||
|
|
||||||
fprintf(f, "%3d ", code - codestart);
|
fprintf(f, "%3d ", (int)(code - codestart));
|
||||||
|
|
||||||
if (*code >= OP_BRA)
|
if (*code >= OP_BRA)
|
||||||
{
|
{
|
||||||
|
@ -114,7 +164,7 @@ for(;;)
|
||||||
fprintf(f, "%3d Bra extra\n", GET(code, 1));
|
fprintf(f, "%3d Bra extra\n", GET(code, 1));
|
||||||
else
|
else
|
||||||
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
|
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
|
||||||
code += OP_lengths[OP_BRA];
|
code += _pcre_OP_lengths[OP_BRA];
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,18 +179,31 @@ for(;;)
|
||||||
fprintf(f, " %.2x %s", code[1], OP_names[*code]);
|
fprintf(f, " %.2x %s", code[1], OP_names[*code]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_CHARS:
|
case OP_CHAR:
|
||||||
{
|
{
|
||||||
int charlength = code[1];
|
fprintf(f, " ");
|
||||||
ccode = code + 2;
|
do
|
||||||
extra = charlength;
|
|
||||||
fprintf(f, "%3d ", charlength);
|
|
||||||
while (charlength > 0)
|
|
||||||
{
|
{
|
||||||
int extrabytes = print_char(f, ccode, utf8);
|
code++;
|
||||||
ccode += 1 + extrabytes;
|
code += 1 + print_char(f, code, utf8);
|
||||||
charlength -= 1 + extrabytes;
|
|
||||||
}
|
}
|
||||||
|
while (*code == OP_CHAR);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CHARNC:
|
||||||
|
{
|
||||||
|
fprintf(f, " NC ");
|
||||||
|
do
|
||||||
|
{
|
||||||
|
code++;
|
||||||
|
code += 1 + print_char(f, code, utf8);
|
||||||
|
}
|
||||||
|
while (*code == OP_CHARNC);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -182,8 +245,16 @@ for(;;)
|
||||||
case OP_TYPEQUERY:
|
case OP_TYPEQUERY:
|
||||||
case OP_TYPEMINQUERY:
|
case OP_TYPEMINQUERY:
|
||||||
fprintf(f, " ");
|
fprintf(f, " ");
|
||||||
if (*code >= OP_TYPESTAR) fprintf(f, "%s", OP_names[code[1]]);
|
if (*code >= OP_TYPESTAR)
|
||||||
else extra = print_char(f, code+1, utf8);
|
{
|
||||||
|
fprintf(f, "%s", OP_names[code[1]]);
|
||||||
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||||
|
{
|
||||||
|
fprintf(f, " %s ", get_ucpname(code[2]));
|
||||||
|
extra = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else extra = print_char(f, code+1, utf8);
|
||||||
fprintf(f, "%s", OP_names[*code]);
|
fprintf(f, "%s", OP_names[*code]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -201,7 +272,13 @@ for(;;)
|
||||||
case OP_TYPEEXACT:
|
case OP_TYPEEXACT:
|
||||||
case OP_TYPEUPTO:
|
case OP_TYPEUPTO:
|
||||||
case OP_TYPEMINUPTO:
|
case OP_TYPEMINUPTO:
|
||||||
fprintf(f, " %s{", OP_names[code[3]]);
|
fprintf(f, " %s", OP_names[code[3]]);
|
||||||
|
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
|
||||||
|
{
|
||||||
|
fprintf(f, " %s ", get_ucpname(code[4]));
|
||||||
|
extra = 1;
|
||||||
|
}
|
||||||
|
fprintf(f, "{");
|
||||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||||
fprintf(f, "%d}", GET2(code,1));
|
fprintf(f, "%d}", GET2(code,1));
|
||||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||||
|
@ -228,7 +305,7 @@ for(;;)
|
||||||
case OP_NOTMINUPTO:
|
case OP_NOTMINUPTO:
|
||||||
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
|
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
|
||||||
else fprintf(f, " [^\\x%02x]{", c);
|
else fprintf(f, " [^\\x%02x]{", c);
|
||||||
if (*code != OP_NOTEXACT) fprintf(f, ",");
|
if (*code != OP_NOTEXACT) fprintf(f, "0,");
|
||||||
fprintf(f, "%d}", GET2(code,1));
|
fprintf(f, "%d}", GET2(code,1));
|
||||||
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
|
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
|
||||||
break;
|
break;
|
||||||
|
@ -239,11 +316,17 @@ for(;;)
|
||||||
|
|
||||||
case OP_REF:
|
case OP_REF:
|
||||||
fprintf(f, " \\%d", GET2(code,1));
|
fprintf(f, " \\%d", GET2(code,1));
|
||||||
ccode = code + OP_lengths[*code];
|
ccode = code + _pcre_OP_lengths[*code];
|
||||||
goto CLASS_REF_REPEAT;
|
goto CLASS_REF_REPEAT;
|
||||||
|
|
||||||
case OP_CALLOUT:
|
case OP_CALLOUT:
|
||||||
fprintf(f, " %s %d", OP_names[*code], code[1]);
|
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
|
||||||
|
GET(code, 2 + LINK_SIZE));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_PROP:
|
||||||
|
case OP_NOTPROP:
|
||||||
|
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
|
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
|
||||||
|
@ -287,7 +370,7 @@ for(;;)
|
||||||
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
|
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
|
||||||
if (--j > i)
|
if (--j > i)
|
||||||
{
|
{
|
||||||
fprintf(f, "-");
|
if (j != i + 1) fprintf(f, "-");
|
||||||
if (j == '-' || j == ']') fprintf(f, "\\");
|
if (j == '-' || j == ']') fprintf(f, "\\");
|
||||||
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
|
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
|
||||||
}
|
}
|
||||||
|
@ -304,11 +387,22 @@ for(;;)
|
||||||
int ch;
|
int ch;
|
||||||
while ((ch = *ccode++) != XCL_END)
|
while ((ch = *ccode++) != XCL_END)
|
||||||
{
|
{
|
||||||
ccode += 1 + print_char(f, ccode, TRUE);
|
if (ch == XCL_PROP)
|
||||||
if (ch == XCL_RANGE)
|
{
|
||||||
|
fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
|
||||||
|
}
|
||||||
|
else if (ch == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
fprintf(f, "-");
|
|
||||||
ccode += 1 + print_char(f, ccode, TRUE);
|
ccode += 1 + print_char(f, ccode, TRUE);
|
||||||
|
if (ch == XCL_RANGE)
|
||||||
|
{
|
||||||
|
fprintf(f, "-");
|
||||||
|
ccode += 1 + print_char(f, ccode, TRUE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -329,7 +423,7 @@ for(;;)
|
||||||
case OP_CRQUERY:
|
case OP_CRQUERY:
|
||||||
case OP_CRMINQUERY:
|
case OP_CRMINQUERY:
|
||||||
fprintf(f, "%s", OP_names[*ccode]);
|
fprintf(f, "%s", OP_names[*ccode]);
|
||||||
extra = OP_lengths[*ccode];
|
extra += _pcre_OP_lengths[*ccode];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_CRRANGE:
|
case OP_CRRANGE:
|
||||||
|
@ -339,7 +433,7 @@ for(;;)
|
||||||
if (max == 0) fprintf(f, "{%d,}", min);
|
if (max == 0) fprintf(f, "{%d,}", min);
|
||||||
else fprintf(f, "{%d,%d}", min, max);
|
else fprintf(f, "{%d,%d}", min, max);
|
||||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||||
extra = OP_lengths[*ccode];
|
extra += _pcre_OP_lengths[*ccode];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -352,9 +446,9 @@ for(;;)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
code += OP_lengths[*code] + extra;
|
code += _pcre_OP_lengths[*code] + extra;
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of printint.c */
|
/* End of pcre_printint.src */
|
|
@ -0,0 +1,77 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_refcount(), which is an
|
||||||
|
auxiliary function that can be used to maintain a reference count in a compiled
|
||||||
|
pattern data block. This might be helpful in applications where the block is
|
||||||
|
shared by different users. */
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Maintain reference count *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* The reference count is a 16-bit field, initialized to zero. It is not
|
||||||
|
possible to transfer a non-zero count from one host to a different host that
|
||||||
|
has a different byte order - though I can't see why anyone in their right mind
|
||||||
|
would ever want to do that!
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
argument_re points to compiled code
|
||||||
|
adjust value to add to the count
|
||||||
|
|
||||||
|
Returns: the (possibly updated) count value (a non-negative number), or
|
||||||
|
a negative error number
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT int
|
||||||
|
pcre_refcount(pcre *argument_re, int adjust)
|
||||||
|
{
|
||||||
|
real_pcre *re = (real_pcre *)argument_re;
|
||||||
|
if (re == NULL) return PCRE_ERROR_NULL;
|
||||||
|
re->ref_count = (-adjust > re->ref_count)? 0 :
|
||||||
|
(adjust + re->ref_count > 65535)? 65535 :
|
||||||
|
re->ref_count + adjust;
|
||||||
|
return re->ref_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_refcount.c */
|
|
@ -2,42 +2,47 @@
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/*
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
This is a library of functions to support regular expressions whose syntax
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
and semantics are as close as possible to those of the Perl 5 language. See
|
|
||||||
the file Tech.Notes for some information on the internals.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
Redistribution and use in source and binary forms, with or without
|
||||||
computer system, and to redistribute it freely, subject to the following
|
modification, are permitted provided that the following conditions are met:
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
this list of conditions and the following disclaimer.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
explicit claim or by omission.
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
misrepresented as being the original software.
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
supersede any condition above with which it is incompatible.
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/* Include the internals header, which itself includes Standard C headers plus
|
/* This module contains the external function pcre_study(), along with local
|
||||||
the external pcre header. */
|
supporting functions. */
|
||||||
|
|
||||||
#include "internal.h"
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
|
@ -57,7 +62,7 @@ Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
|
set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
|
||||||
{
|
{
|
||||||
start_bits[c/8] |= (1 << (c&7));
|
start_bits[c/8] |= (1 << (c&7));
|
||||||
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
||||||
|
@ -123,7 +128,7 @@ do
|
||||||
/* Skip over callout */
|
/* Skip over callout */
|
||||||
|
|
||||||
case OP_CALLOUT:
|
case OP_CALLOUT:
|
||||||
tcode += 2;
|
tcode += 2 + 2*LINK_SIZE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Skip over extended extraction bracket number */
|
/* Skip over extended extraction bracket number */
|
||||||
|
@ -186,11 +191,10 @@ do
|
||||||
/* At least one single char sets the bit and stops */
|
/* At least one single char sets the bit and stops */
|
||||||
|
|
||||||
case OP_EXACT: /* Fall through */
|
case OP_EXACT: /* Fall through */
|
||||||
tcode++;
|
tcode += 2;
|
||||||
|
|
||||||
case OP_CHARS: /* Fall through */
|
|
||||||
tcode++;
|
|
||||||
|
|
||||||
|
case OP_CHAR:
|
||||||
|
case OP_CHARNC:
|
||||||
case OP_PLUS:
|
case OP_PLUS:
|
||||||
case OP_MINPLUS:
|
case OP_MINPLUS:
|
||||||
set_bit(start_bits, tcode[1], caseless, cd);
|
set_bit(start_bits, tcode[1], caseless, cd);
|
||||||
|
@ -397,14 +401,15 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||||
NULL on error or if no optimization possible
|
NULL on error or if no optimization possible
|
||||||
*/
|
*/
|
||||||
|
|
||||||
EXPORT pcre_extra *
|
PCRE_EXPORT pcre_extra *
|
||||||
_pcre_study(const pcre *external_re, int options, const char **errorptr)
|
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||||
{
|
{
|
||||||
uschar start_bits[32];
|
uschar start_bits[32];
|
||||||
pcre_extra *extra;
|
pcre_extra *extra;
|
||||||
pcre_study_data *study;
|
pcre_study_data *study;
|
||||||
|
const uschar *tables;
|
||||||
const real_pcre *re = (const real_pcre *)external_re;
|
const real_pcre *re = (const real_pcre *)external_re;
|
||||||
uschar *code = (uschar *)re + sizeof(real_pcre) +
|
uschar *code = (uschar *)re + re->name_table_offset +
|
||||||
(re->name_count * re->name_entry_size);
|
(re->name_count * re->name_entry_size);
|
||||||
compile_data compile_block;
|
compile_data compile_block;
|
||||||
|
|
||||||
|
@ -429,12 +434,17 @@ at present. */
|
||||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* Set the character tables in the block which is passed around */
|
/* Set the character tables in the block that is passed around */
|
||||||
|
|
||||||
compile_block.lcc = re->tables + lcc_offset;
|
tables = re->tables;
|
||||||
compile_block.fcc = re->tables + fcc_offset;
|
if (tables == NULL)
|
||||||
compile_block.cbits = re->tables + cbits_offset;
|
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
|
||||||
compile_block.ctypes = re->tables + ctypes_offset;
|
(void *)(&tables));
|
||||||
|
|
||||||
|
compile_block.lcc = tables + lcc_offset;
|
||||||
|
compile_block.fcc = tables + fcc_offset;
|
||||||
|
compile_block.cbits = tables + cbits_offset;
|
||||||
|
compile_block.ctypes = tables + ctypes_offset;
|
||||||
|
|
||||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||||
|
|
||||||
|
@ -449,7 +459,7 @@ pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
||||||
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
||||||
don't have to change that code. */
|
don't have to change that code. */
|
||||||
|
|
||||||
extra = (pcre_extra *)(g_malloc)
|
extra = (pcre_extra *)(pcre_malloc)
|
||||||
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
||||||
|
|
||||||
if (extra == NULL)
|
if (extra == NULL)
|
||||||
|
@ -469,4 +479,4 @@ memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||||
return extra;
|
return extra;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of study.c */
|
/* End of pcre_study.c */
|
|
@ -0,0 +1,130 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains some fixed tables that are used by more than one of the
|
||||||
|
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||||
|
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||||
|
clashes with the library. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||||
|
the definition is next to the definition of the opcodes in internal.h. */
|
||||||
|
|
||||||
|
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Tables for UTF-8 support *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||||
|
character. */
|
||||||
|
|
||||||
|
const int _pcre_utf8_table1[] =
|
||||||
|
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||||
|
|
||||||
|
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
|
||||||
|
|
||||||
|
/* These are the indicator bits and the mask for the data bits to set in the
|
||||||
|
first byte of a character, indexed by the number of additional bytes. */
|
||||||
|
|
||||||
|
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||||
|
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||||
|
|
||||||
|
/* Table of the number of extra characters, indexed by the first character
|
||||||
|
masked with 0x3f. The highest number for a valid UTF-8 character is in fact
|
||||||
|
0x3d. */
|
||||||
|
|
||||||
|
const uschar _pcre_utf8_table4[] = {
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||||
|
|
||||||
|
/* This table translates Unicode property names into code values for the
|
||||||
|
ucp_findchar() function. */
|
||||||
|
|
||||||
|
const ucp_type_table _pcre_utt[] = {
|
||||||
|
{ "C", 128 + ucp_C },
|
||||||
|
{ "Cc", ucp_Cc },
|
||||||
|
{ "Cf", ucp_Cf },
|
||||||
|
{ "Cn", ucp_Cn },
|
||||||
|
{ "Co", ucp_Co },
|
||||||
|
{ "Cs", ucp_Cs },
|
||||||
|
{ "L", 128 + ucp_L },
|
||||||
|
{ "Ll", ucp_Ll },
|
||||||
|
{ "Lm", ucp_Lm },
|
||||||
|
{ "Lo", ucp_Lo },
|
||||||
|
{ "Lt", ucp_Lt },
|
||||||
|
{ "Lu", ucp_Lu },
|
||||||
|
{ "M", 128 + ucp_M },
|
||||||
|
{ "Mc", ucp_Mc },
|
||||||
|
{ "Me", ucp_Me },
|
||||||
|
{ "Mn", ucp_Mn },
|
||||||
|
{ "N", 128 + ucp_N },
|
||||||
|
{ "Nd", ucp_Nd },
|
||||||
|
{ "Nl", ucp_Nl },
|
||||||
|
{ "No", ucp_No },
|
||||||
|
{ "P", 128 + ucp_P },
|
||||||
|
{ "Pc", ucp_Pc },
|
||||||
|
{ "Pd", ucp_Pd },
|
||||||
|
{ "Pe", ucp_Pe },
|
||||||
|
{ "Pf", ucp_Pf },
|
||||||
|
{ "Pi", ucp_Pi },
|
||||||
|
{ "Po", ucp_Po },
|
||||||
|
{ "Ps", ucp_Ps },
|
||||||
|
{ "S", 128 + ucp_S },
|
||||||
|
{ "Sc", ucp_Sc },
|
||||||
|
{ "Sk", ucp_Sk },
|
||||||
|
{ "Sm", ucp_Sm },
|
||||||
|
{ "So", ucp_So },
|
||||||
|
{ "Z", 128 + ucp_Z },
|
||||||
|
{ "Zl", ucp_Zl },
|
||||||
|
{ "Zp", ucp_Zp },
|
||||||
|
{ "Zs", ucp_Zs }
|
||||||
|
};
|
||||||
|
|
||||||
|
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||||
|
|
||||||
|
/* End of pcre_tables.c */
|
|
@ -0,0 +1,132 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains an internal function that tests a compiled pattern to
|
||||||
|
see if it was compiled with the opposite endianness. If so, it uses an
|
||||||
|
auxiliary local function to flip the appropriate bytes. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Flip bytes in an integer *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called when the magic number in a regex doesn't match, in
|
||||||
|
order to flip its bytes to see if we are dealing with a pattern that was
|
||||||
|
compiled on a host of different endianness. If so, this function is used to
|
||||||
|
flip other byte values.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
value the number to flip
|
||||||
|
n the number of bytes to flip (assumed to be 2 or 4)
|
||||||
|
|
||||||
|
Returns: the flipped value
|
||||||
|
*/
|
||||||
|
|
||||||
|
static long int
|
||||||
|
byteflip(long int value, int n)
|
||||||
|
{
|
||||||
|
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||||
|
return ((value & 0x000000ff) << 24) |
|
||||||
|
((value & 0x0000ff00) << 8) |
|
||||||
|
((value & 0x00ff0000) >> 8) |
|
||||||
|
((value & 0xff000000) >> 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Test for a byte-flipped compiled regex *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
|
||||||
|
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
|
||||||
|
is, it was compiled on a system of opposite endianness. The function is called
|
||||||
|
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
|
||||||
|
we flip all the relevant values into a different data block, and return it.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
re points to the regex
|
||||||
|
study points to study data, or NULL
|
||||||
|
internal_re points to a new regex block
|
||||||
|
internal_study points to a new study block
|
||||||
|
|
||||||
|
Returns: the new block if is is indeed a byte-flipped regex
|
||||||
|
NULL if it is not
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT real_pcre *
|
||||||
|
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
|
||||||
|
const pcre_study_data *study, pcre_study_data *internal_study)
|
||||||
|
{
|
||||||
|
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
*internal_re = *re; /* To copy other fields */
|
||||||
|
internal_re->size = byteflip(re->size, sizeof(re->size));
|
||||||
|
internal_re->options = byteflip(re->options, sizeof(re->options));
|
||||||
|
internal_re->top_bracket =
|
||||||
|
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
|
||||||
|
internal_re->top_backref =
|
||||||
|
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
|
||||||
|
internal_re->first_byte =
|
||||||
|
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
|
||||||
|
internal_re->req_byte =
|
||||||
|
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
|
||||||
|
internal_re->name_table_offset =
|
||||||
|
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
|
||||||
|
internal_re->name_entry_size =
|
||||||
|
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
|
||||||
|
internal_re->name_count =
|
||||||
|
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
|
||||||
|
|
||||||
|
if (study != NULL)
|
||||||
|
{
|
||||||
|
*internal_study = *study; /* To copy other fields */
|
||||||
|
internal_study->size = byteflip(study->size, sizeof(study->size));
|
||||||
|
internal_study->options = byteflip(study->options, sizeof(study->options));
|
||||||
|
}
|
||||||
|
|
||||||
|
return internal_re;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_tryflipped.c */
|
|
@ -0,0 +1,161 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module compiles code for supporting the use of Unicode character
|
||||||
|
properties. We use the (embryonic at the time of writing) UCP library, by
|
||||||
|
including some of its files, copies of which have been put in the PCRE
|
||||||
|
distribution. The actual search function is reproduced here, with its name
|
||||||
|
changed. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
#include "ucp.h" /* Category definitions */
|
||||||
|
#include "ucpinternal.h" /* Internal table details */
|
||||||
|
#include "ucptable.c" /* The table itself */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Search table and return data *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Two values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||||
|
character type is ucp_Lu, ucp_Nd, etc.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the character value
|
||||||
|
type_ptr the detailed character type is returned here
|
||||||
|
case_ptr for letters, the opposite case is returned here, if there
|
||||||
|
is one, else zero
|
||||||
|
|
||||||
|
Returns: the character type category or -1 if not found
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT int
|
||||||
|
_pcre_ucp_findchar(const int c, int *type_ptr, int *case_ptr)
|
||||||
|
{
|
||||||
|
cnode *node = ucp_table;
|
||||||
|
register int cc = c;
|
||||||
|
int case_offset;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
register int d = node->f1 | ((node->f0 & f0_chhmask) << 16);
|
||||||
|
if (cc == d) break;
|
||||||
|
if (cc < d)
|
||||||
|
{
|
||||||
|
if ((node->f0 & f0_leftexists) == 0) return -1;
|
||||||
|
node ++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
register int roffset = (node->f2 & f2_rightmask) >> f2_rightshift;
|
||||||
|
if (roffset == 0) return -1;
|
||||||
|
node += 1 << (roffset - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch ((*type_ptr = ((node->f0 & f0_typemask) >> f0_typeshift)))
|
||||||
|
{
|
||||||
|
case ucp_Cc:
|
||||||
|
case ucp_Cf:
|
||||||
|
case ucp_Cn:
|
||||||
|
case ucp_Co:
|
||||||
|
case ucp_Cs:
|
||||||
|
return ucp_C;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ucp_Ll:
|
||||||
|
case ucp_Lu:
|
||||||
|
case_offset = node->f2 & f2_casemask;
|
||||||
|
if ((case_offset & 0x0100) != 0) case_offset |= 0xfffff000;
|
||||||
|
*case_ptr = (case_offset == 0)? 0 : cc + case_offset;
|
||||||
|
return ucp_L;
|
||||||
|
|
||||||
|
case ucp_Lm:
|
||||||
|
case ucp_Lo:
|
||||||
|
case ucp_Lt:
|
||||||
|
*case_ptr = 0;
|
||||||
|
return ucp_L;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ucp_Mc:
|
||||||
|
case ucp_Me:
|
||||||
|
case ucp_Mn:
|
||||||
|
return ucp_M;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ucp_Nd:
|
||||||
|
case ucp_Nl:
|
||||||
|
case ucp_No:
|
||||||
|
return ucp_N;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ucp_Pc:
|
||||||
|
case ucp_Pd:
|
||||||
|
case ucp_Pe:
|
||||||
|
case ucp_Pf:
|
||||||
|
case ucp_Pi:
|
||||||
|
case ucp_Ps:
|
||||||
|
case ucp_Po:
|
||||||
|
return ucp_P;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ucp_Sc:
|
||||||
|
case ucp_Sk:
|
||||||
|
case ucp_Sm:
|
||||||
|
case ucp_So:
|
||||||
|
return ucp_S;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ucp_Zl:
|
||||||
|
case ucp_Zp:
|
||||||
|
case ucp_Zs:
|
||||||
|
return ucp_Z;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: /* "Should never happen" */
|
||||||
|
return -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_ucp_findchar.c */
|
|
@ -0,0 +1,130 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains an internal function for validating UTF-8 character
|
||||||
|
strings. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Validate a UTF-8 string *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called (optionally) at the start of compile or match, to
|
||||||
|
validate that a supposed UTF-8 string is actually valid. The early check means
|
||||||
|
that subsequent code can assume it is dealing with a valid string. The check
|
||||||
|
can be turned off for maximum performance, but the consequences of supplying
|
||||||
|
an invalid string are then undefined.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
string points to the string
|
||||||
|
length length of string, or -1 if the string is zero-terminated
|
||||||
|
|
||||||
|
Returns: < 0 if the string is a valid UTF-8 string
|
||||||
|
>= 0 otherwise; the value is the offset of the bad byte
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT int
|
||||||
|
_pcre_valid_utf8(const uschar *string, int length)
|
||||||
|
{
|
||||||
|
register const uschar *p;
|
||||||
|
|
||||||
|
if (length < 0)
|
||||||
|
{
|
||||||
|
for (p = string; *p != 0; p++);
|
||||||
|
length = p - string;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (p = string; length-- > 0; p++)
|
||||||
|
{
|
||||||
|
register int ab;
|
||||||
|
register int c = *p;
|
||||||
|
if (c < 128) continue;
|
||||||
|
if ((c & 0xc0) != 0xc0) return p - string;
|
||||||
|
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||||
|
if (length < ab) return p - string;
|
||||||
|
length -= ab;
|
||||||
|
|
||||||
|
/* Check top bits in the second byte */
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||||
|
|
||||||
|
/* Check for overlong sequences for each different length */
|
||||||
|
switch (ab)
|
||||||
|
{
|
||||||
|
/* Check for xx00 000x */
|
||||||
|
case 1:
|
||||||
|
if ((c & 0x3e) == 0) return p - string;
|
||||||
|
continue; /* We know there aren't any more bytes to check */
|
||||||
|
|
||||||
|
/* Check for 1110 0000, xx0x xxxx */
|
||||||
|
case 2:
|
||||||
|
if (c == 0xe0 && (*p & 0x20) == 0) return p - string;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Check for 1111 0000, xx00 xxxx */
|
||||||
|
case 3:
|
||||||
|
if (c == 0xf0 && (*p & 0x30) == 0) return p - string;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Check for 1111 1000, xx00 0xxx */
|
||||||
|
case 4:
|
||||||
|
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */
|
||||||
|
case 5:
|
||||||
|
if (c == 0xfe || c == 0xff ||
|
||||||
|
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
||||||
|
while (--ab > 0)
|
||||||
|
{
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_valid_utf8.c */
|
|
@ -0,0 +1,61 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre_version(), which returns a
|
||||||
|
string that identifies the PCRE version that is in use. */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Return version string *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
#define STRING(a) # a
|
||||||
|
#define XSTRING(s) STRING(s)
|
||||||
|
|
||||||
|
PCRE_EXPORT const char *
|
||||||
|
pcre_version(void)
|
||||||
|
{
|
||||||
|
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_version.c */
|
|
@ -0,0 +1,121 @@
|
||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 1997-2005 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains an internal function that is used to match an extended
|
||||||
|
class (one that contains characters whose values are > 255). It is used by both
|
||||||
|
pcre_exec() and pcre_def_exec(). */
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Match character against an XCLASS *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called to match a character against an extended class that
|
||||||
|
might contain values > 255.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the character
|
||||||
|
data points to the flag byte of the XCLASS data
|
||||||
|
|
||||||
|
Returns: TRUE if character matches, else FALSE
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE_EXPORT BOOL
|
||||||
|
_pcre_xclass(int c, const uschar *data)
|
||||||
|
{
|
||||||
|
int t;
|
||||||
|
BOOL negated = (*data & XCL_NOT) != 0;
|
||||||
|
|
||||||
|
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||||
|
not, we still carry on, because there may be ranges that start below 256 in the
|
||||||
|
additional data. */
|
||||||
|
|
||||||
|
if (c < 256)
|
||||||
|
{
|
||||||
|
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
||||||
|
return !negated; /* char found */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* First skip the bit map if present. Then match against the list of Unicode
|
||||||
|
properties or large chars or ranges that end with a large char. We won't ever
|
||||||
|
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||||
|
|
||||||
|
if ((*data++ & XCL_MAP) != 0) data += 32;
|
||||||
|
|
||||||
|
while ((t = *data++) != XCL_END)
|
||||||
|
{
|
||||||
|
int x, y;
|
||||||
|
if (t == XCL_SINGLE)
|
||||||
|
{
|
||||||
|
GETCHARINC(x, data);
|
||||||
|
if (c == x) return !negated;
|
||||||
|
}
|
||||||
|
else if (t == XCL_RANGE)
|
||||||
|
{
|
||||||
|
GETCHARINC(x, data);
|
||||||
|
GETCHARINC(y, data);
|
||||||
|
if (c >= x && c <= y) return !negated;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UCP
|
||||||
|
else /* XCL_PROP & XCL_NOTPROP */
|
||||||
|
{
|
||||||
|
int chartype, othercase;
|
||||||
|
int rqdtype = *data++;
|
||||||
|
int category = _pcre_ucp_findchar(c, &chartype, &othercase);
|
||||||
|
if (rqdtype >= 128)
|
||||||
|
{
|
||||||
|
if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UCP */
|
||||||
|
}
|
||||||
|
|
||||||
|
return negated; /* char did not match */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre_xclass.c */
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*************************************************
|
||||||
|
* libucp - Unicode Property Table handler *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _UCP_H
|
||||||
|
#define _UCP_H
|
||||||
|
|
||||||
|
/* These are the character categories that are returned by ucp_findchar */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
ucp_C, /* Other */
|
||||||
|
ucp_L, /* Letter */
|
||||||
|
ucp_M, /* Mark */
|
||||||
|
ucp_N, /* Number */
|
||||||
|
ucp_P, /* Punctuation */
|
||||||
|
ucp_S, /* Symbol */
|
||||||
|
ucp_Z /* Separator */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* These are the detailed character types that are returned by ucp_findchar */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
ucp_Cc, /* Control */
|
||||||
|
ucp_Cf, /* Format */
|
||||||
|
ucp_Cn, /* Unassigned */
|
||||||
|
ucp_Co, /* Private use */
|
||||||
|
ucp_Cs, /* Surrogate */
|
||||||
|
ucp_Ll, /* Lower case letter */
|
||||||
|
ucp_Lm, /* Modifier letter */
|
||||||
|
ucp_Lo, /* Other letter */
|
||||||
|
ucp_Lt, /* Title case letter */
|
||||||
|
ucp_Lu, /* Upper case letter */
|
||||||
|
ucp_Mc, /* Spacing mark */
|
||||||
|
ucp_Me, /* Enclosing mark */
|
||||||
|
ucp_Mn, /* Non-spacing mark */
|
||||||
|
ucp_Nd, /* Decimal number */
|
||||||
|
ucp_Nl, /* Letter number */
|
||||||
|
ucp_No, /* Other number */
|
||||||
|
ucp_Pc, /* Connector punctuation */
|
||||||
|
ucp_Pd, /* Dash punctuation */
|
||||||
|
ucp_Pe, /* Close punctuation */
|
||||||
|
ucp_Pf, /* Final punctuation */
|
||||||
|
ucp_Pi, /* Initial punctuation */
|
||||||
|
ucp_Po, /* Other punctuation */
|
||||||
|
ucp_Ps, /* Open punctuation */
|
||||||
|
ucp_Sc, /* Currency symbol */
|
||||||
|
ucp_Sk, /* Modifier symbol */
|
||||||
|
ucp_Sm, /* Mathematical symbol */
|
||||||
|
ucp_So, /* Other symbol */
|
||||||
|
ucp_Zl, /* Line separator */
|
||||||
|
ucp_Zp, /* Paragraph separator */
|
||||||
|
ucp_Zs /* Space separator */
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* End of ucp.h */
|
|
@ -0,0 +1,91 @@
|
||||||
|
/*************************************************
|
||||||
|
* libucp - Unicode Property Table handler *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Internal header file defining the layout of compact nodes in the tree. */
|
||||||
|
|
||||||
|
typedef struct cnode {
|
||||||
|
unsigned short int f0;
|
||||||
|
unsigned short int f1;
|
||||||
|
unsigned short int f2;
|
||||||
|
} cnode;
|
||||||
|
|
||||||
|
/* Things for the f0 field */
|
||||||
|
|
||||||
|
#define f0_leftexists 0x8000 /* Left child exists */
|
||||||
|
#define f0_typemask 0x3f00 /* Type bits */
|
||||||
|
#define f0_typeshift 8 /* Type shift */
|
||||||
|
#define f0_chhmask 0x00ff /* Character high bits */
|
||||||
|
|
||||||
|
/* Things for the f2 field */
|
||||||
|
|
||||||
|
#define f2_rightmask 0xf000 /* Mask for right offset bits */
|
||||||
|
#define f2_rightshift 12 /* Shift for right offset */
|
||||||
|
#define f2_casemask 0x0fff /* Mask for case offset */
|
||||||
|
|
||||||
|
/* The tree consists of a vector of structures of type cnode, with the root
|
||||||
|
node as the first element. The three short ints (16-bits) are used as follows:
|
||||||
|
|
||||||
|
(f0) (1) The 0x8000 bit of f0 is set if a left child exists. The child's node
|
||||||
|
is the next node in the vector.
|
||||||
|
(2) The 0x4000 bits of f0 is spare.
|
||||||
|
(3) The 0x3f00 bits of f0 contain the character type; this is a number
|
||||||
|
defined by the enumeration in ucp.h (e.g. ucp_Lu).
|
||||||
|
(4) The bottom 8 bits of f0 contain the most significant byte of the
|
||||||
|
character's 24-bit codepoint.
|
||||||
|
|
||||||
|
(f1) (1) The f1 field contains the two least significant bytes of the
|
||||||
|
codepoint.
|
||||||
|
|
||||||
|
(f2) (1) The 0xf000 bits of f2 contain zero if there is no right child of this
|
||||||
|
node. Otherwise, they contain one plus the exponent of the power of
|
||||||
|
two of the offset to the right node (e.g. a value of 3 means 8). The
|
||||||
|
units of the offset are node items.
|
||||||
|
|
||||||
|
(2) The 0x0fff bits of f2 contain the signed offset from this character to
|
||||||
|
its alternate cased value. They are zero if there is no such
|
||||||
|
character.
|
||||||
|
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
||.|.| type (6) | ms char (8) || ls char (16) ||....| case offset (12) ||
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
| | |
|
||||||
|
| |-> spare |
|
||||||
|
| exponent of right
|
||||||
|
|-> left child exists child offset
|
||||||
|
|
||||||
|
|
||||||
|
The upper/lower casing information is set only for characters that come in
|
||||||
|
pairs. There are (at present) four non-one-to-one mappings in the Unicode data.
|
||||||
|
These are ignored. They are:
|
||||||
|
|
||||||
|
1FBE Greek Prosgegrammeni (lower, with upper -> capital iota)
|
||||||
|
2126 Ohm
|
||||||
|
212A Kelvin
|
||||||
|
212B Angstrom
|
||||||
|
|
||||||
|
Certainly for the last three, having an alternate case would seem to be a
|
||||||
|
mistake. I don't know any Greek, so cannot comment on the first one.
|
||||||
|
|
||||||
|
|
||||||
|
When searching the tree, proceed as follows:
|
||||||
|
|
||||||
|
(1) Start at the first node.
|
||||||
|
|
||||||
|
(2) Extract the character value from f1 and the bottom 8 bits of f0;
|
||||||
|
|
||||||
|
(3) Compare with the character being sought. If equal, we are done.
|
||||||
|
|
||||||
|
(4) If the test character is smaller, inspect the f0_leftexists flag. If it is
|
||||||
|
not set, the character is not in the tree. If it is set, move to the next
|
||||||
|
node, and go to (2).
|
||||||
|
|
||||||
|
(5) If the test character is bigger, extract the f2_rightmask bits from f2, and
|
||||||
|
shift them right by f2_rightshift. If the result is zero, the character is
|
||||||
|
not in the tree. Otherwise, calculate the number of nodes to skip by
|
||||||
|
shifting the value 1 left by this number minus one. Go to (2).
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* End of internal.h */
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue