Update pcre2 to 10.36

This performs *most* of the pcreectomy of b418e36f226b754b.

It removes the tests and docs and all the large files, but it does
*not* touch any of the files except for making Find_Package
quiet (783a895b1163149d) or remove the AUTHORS and similar files as
they are very small.

This seems much easier, cleaner, nicer and has 90% of the effect of
the old - the size now is 2.7MB instead of 2.1MB, down from 10MB.

Fixes 
This commit is contained in:
Fabian Homborg 2021-02-01 17:33:04 +01:00
parent 2d78c9a0d9
commit 97be837ff5
49 changed files with 20684 additions and 100 deletions

314
pcre2/132html vendored Executable file

@ -0,0 +1,314 @@
#! /usr/bin/perl -w
# Script to turn PCRE2 man pages into HTML
# Subroutine to handle font changes and other escapes
sub do_line {
my($s) = $_[0];
$s =~ s/</&#60;/g; # Deal with < and >
$s =~ s/>/&#62;/g;
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
$s =~ s"\\e"\\"g;
$s =~ s/(?<=Copyright )\(c\)/&copy;/g;
$s;
}
# Subroutine to ensure not in a paragraph
sub end_para {
if ($inpara)
{
print TEMP "</PRE>\n" if ($inpre);
print TEMP "</P>\n";
}
$inpara = $inpre = 0;
$wrotetext = 0;
}
# Subroutine to start a new paragraph
sub new_para {
&end_para();
print TEMP "<P>\n";
$inpara = 1;
}
# Main program
$innf = 0;
$inpara = 0;
$inpre = 0;
$wrotetext = 0;
$toc = 0;
$ref = 1;
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
{
$toc = 1 if $ARGV[0] eq "-toc";
shift;
}
# Initial output to STDOUT
print <<End ;
<html>
<head>
<title>$ARGV[0] specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>$ARGV[0] man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
End
print "<ul>\n" if ($toc);
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
while (<STDIN>)
{
# Handle lines beginning with a dot
if (/^\./)
{
# Some of the PCRE2 man pages used to contain instances of .br. However,
# they should have all been removed because they cause trouble in some
# (other) automated systems that translate man pages to HTML. Complain if
# we find .br or .in (another macro that is deprecated).
if (/^\.br/ || /^\.in/)
{
print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
print STDERR "*** $_\n";
die "*** Processing abandoned\n";
}
# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
elsif (/^\.nf/)
{
$innf = 1;
}
elsif (/^\.fi/)
{
$innf = 0;
}
# Handling .sp is subtle. If it is inside a literal section, do nothing if
# the next line is a non literal text line; similarly, if not inside a
# literal section, do nothing if a literal follows, unless we are inside
# a .nf/.fi section or about to enter one. The point being that the <pre>
# and </pre> that delimit literal sections will do the spacing. Always skip
# if no previous output.
elsif (/^\.sp/)
{
if ($wrotetext)
{
$_ = <STDIN>;
if ($inpre)
{
print TEMP "\n" if (/^[\s.]/);
}
else
{
print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
}
redo; # Now process the lookahead line we just read
}
}
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
{
&new_para();
}
elsif (/^\.SH\s*("?)(.*)\1/)
{
# Ignore the NAME section
if ($2 =~ /^NAME\b/)
{
<STDIN>;
next;
}
&end_para();
my($title) = &do_line($2);
if ($toc)
{
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
$ref, $ref);
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
$ref);
$ref++;
}
else
{
print TEMP "<br><b>\n$title\n</b><br>\n";
}
}
elsif (/^\.SS\s*("?)(.*)\1/)
{
&end_para();
my($title) = &do_line($2);
print TEMP "<br><b>\n$title\n</b><br>\n";
}
elsif (/^\.B\s*(.*)/)
{
&new_para() if (!$inpara);
$_ = &do_line($1);
s/"(.*?)"/$1/g;
print TEMP "<b>$_</b>\n";
$wrotetext = 1;
}
elsif (/^\.I\s*(.*)/)
{
&new_para() if (!$inpara);
$_ = &do_line($1);
s/"(.*?)"/$1/g;
print TEMP "<i>$_</i>\n";
$wrotetext = 1;
}
# A comment that starts "HREF" takes the next line as a name that
# is turned into a hyperlink, using the text given, which might be
# in a special font. If it ends in () or (digits) or punctuation, they
# aren't part of the link.
elsif (/^\.\\"\s*HREF/)
{
$_=<STDIN>;
chomp;
$_ = &do_line($_);
$_ =~ s/\s+$//;
$_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
print TEMP "<a href=\"$1.html\">$_</a>\n";
}
# A comment that starts "HTML" inserts literal HTML
elsif (/^\.\\"\s*HTML\s*(.*)/)
{
print TEMP $1;
}
# A comment that starts < inserts that HTML at the end of the
# *next* input line - so as not to get a newline between them.
elsif (/^\.\\"\s*(<.*>)/)
{
my($markup) = $1;
$_=<STDIN>;
chomp;
$_ = &do_line($_);
$_ =~ s/\s+$//;
print TEMP "$_$markup\n";
}
# A comment that starts JOIN joins the next two lines together, with one
# space between them. Then that line is processed. This is used in some
# displays where two lines are needed for the "man" version. JOINSH works
# the same, except that it assumes this is a shell command, so removes
# continuation backslashes.
elsif (/^\.\\"\s*JOIN(SH)?/)
{
my($one,$two);
$one = <STDIN>;
$two = <STDIN>;
$one =~ s/\s*\\e\s*$// if (defined($1));
chomp($one);
$two =~ s/^\s+//;
$_ = "$one $two";
redo; # Process the joined lines
}
# .EX/.EE are used in the pcre2demo page to bracket the entire program,
# which is unmodified except for turning backslash into "\e".
elsif (/^\.EX\s*$/)
{
print TEMP "<PRE>\n";
while (<STDIN>)
{
last if /^\.EE\s*$/;
s/\\e/\\/g;
s/&/&amp;/g;
s/</&lt;/g;
s/>/&gt;/g;
print TEMP;
}
}
# Ignore anything not recognized
next;
}
# Line does not begin with a dot. Replace blank lines with new paragraphs
if (/^\s*$/)
{
&end_para() if ($wrotetext);
next;
}
# Convert fonts changes and output an ordinary line. Ensure that indented
# lines are marked as literal.
$_ = &do_line($_);
&new_para() if (!$inpara);
if (/^\s/)
{
if (!$inpre)
{
print TEMP "<pre>\n";
$inpre = 1;
}
}
elsif ($inpre)
{
print TEMP "</pre>\n";
$inpre = 0;
}
# Add <br> to the end of a non-literal line if we are within .nf/.fi
$_ .= "<br>\n" if (!$inpre && $innf);
print TEMP;
$wrotetext = 1;
}
# The TOC, if present, will have been written - terminate it
print "</ul>\n" if ($toc);
# Copy the remainder to the standard output
close(TEMP);
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
print while (<TEMP>);
print <<End ;
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
End
close(TEMP);
unlink("/tmp/$$");
# End

36
pcre2/AUTHORS vendored Normal file

@ -0,0 +1,36 @@
THE MAIN PCRE2 LIBRARY CODE
---------------------------
Written by: Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2020 University of Cambridge
All rights reserved
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
--------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2020 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2020 Zoltan Herczeg
All rights reserved.
####

67
pcre2/CMakeLists.txt vendored

@ -92,11 +92,13 @@
# library versioning.
# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
# 2020-05-25 PH added a check for Intel CET
# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel
PROJECT(PCRE2 C)
# Increased minimum to 2.8.0 to support newer add_test features.
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
# Increased minimum to 2.8.5 to support GNUInstallDirs.
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.5)
# Set policy CMP0026 to avoid warnings for the use of LOCATION in
# GET_TARGET_PROPERTY. This should no longer be required.
@ -108,7 +110,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I\"${PROJECT_SOURCE_DIR}/src\"")
# external packages
FIND_PACKAGE( BZip2 QUIET )
@ -123,6 +125,7 @@ INCLUDE(CheckFunctionExists)
INCLUDE(CheckSymbolExists)
INCLUDE(CheckIncludeFile)
INCLUDE(CheckTypeSize)
INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
@ -132,11 +135,11 @@ CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
CHECK_FUNCTION_EXISTS(memfd_create HAVE_MEMFD_CREATE)
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
CHECK_FUNCTION_EXISTS(secure_getenv HAVE_SECURE_GETENV)
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY)
CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE)
CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE)
CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV)
CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR)
set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
@ -146,6 +149,24 @@ CHECK_C_SOURCE_COMPILES(
)
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
# code was written by PH, trying to imitate the logic from the autotools
# configuration.
CHECK_C_SOURCE_COMPILES(
"#ifndef __CET__
#error CET is not enabled
#endif
int main() { return 0; }"
INTEL_CET_ENABLED
)
IF (INTEL_CET_ENABLED)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
ENDIF(INTEL_CET_ENABLED)
# User-configurable options
#
# Note: CMakeSetup displays these in alphabetical order, regardless of
@ -445,7 +466,7 @@ foreach(configure_line ${configure_lines})
foreach(_substitution_variable ${SEARCHED_VARIABLES})
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
if (NOT ${_substitution_variable_upper})
string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MACTHED_STRING ${configure_line})
string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line})
if (CMAKE_MATCH_1)
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
endif()
@ -475,14 +496,23 @@ CONFIGURE_FILE(src/pcre2.h.in
${PROJECT_BINARY_DIR}/pcre2.h
@ONLY)
# Make sure to not link debug libs
# against release libs and vice versa
IF(WIN32)
SET(CMAKE_DEBUG_POSTFIX "d")
ENDIF(WIN32)
# Generate pkg-config files
SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
SET(prefix ${CMAKE_INSTALL_PREFIX})
SET(exec_prefix "\${prefix}")
SET(libdir "\${exec_prefix}/lib")
SET(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
SET(includedir "\${prefix}/include")
IF(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
SET(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
ENDIF()
CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
@ -622,11 +652,6 @@ IF(MSVC)
ENDIF(MSVC)
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
# needed to make sure to not link debug libs
# against release libs and vice versa
IF(WIN32)
SET(CMAKE_DEBUG_POSTFIX "d")
ENDIF(WIN32)
SET(targets)
@ -827,7 +852,9 @@ if test \"$?\" != \"0\"; then exit 1; fi
\@echo off
setlocal
SET srcdir=\"${winsrc}\"
SET pcre2test=\"${winexe}\"
# The next line was replaced by the following one after a user comment.
# SET pcre2test=\"${winexe}\"
SET pcre2test=\"${winbin}\\pcre2test.exe\"
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
call %srcdir%\\RunTest.Bat
if errorlevel 1 exit /b 1
@ -863,9 +890,9 @@ SET(CMAKE_INSTALL_ALWAYS 1)
INSTALL(TARGETS ${targets}
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
INSTALL(FILES ${pkg_config_files} DESTINATION lib/pkgconfig)
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
DESTINATION bin
# Set 0755 permissions
@ -916,7 +943,7 @@ IF(PCRE2_SHOW_REPORT)
ENDIF(CMAKE_C_FLAGS)
MESSAGE(STATUS "")
MESSAGE(STATUS "")
MESSAGE(STATUS "PCRE2 configuration summary:")
MESSAGE(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:")
MESSAGE(STATUS "")
MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")

5
pcre2/COPYING vendored Normal file

@ -0,0 +1,5 @@
PCRE2 LICENCE
Please see the file LICENCE in the PCRE2 distribution for licensing details.
End

2434
pcre2/ChangeLog vendored Normal file

File diff suppressed because it is too large Load Diff

78
pcre2/CheckMan vendored Executable file

@ -0,0 +1,78 @@
#! /usr/bin/perl
# A script to scan PCRE2's man pages to check for typos in the control
# sequences. I use only a small set of the available repertoire, so it is
# straightforward to check that nothing else has slipped in by mistake. This
# script should be called in the doc directory.
$yield = 0;
while (scalar(@ARGV) > 0)
{
$line = 0;
$file = shift @ARGV;
open (IN, $file) || die "Failed to open $file\n";
while (<IN>)
{
$count = 0;
$line++;
if (/^\s*$/)
{
printf "Empty line $line of $file\n";
$yield = 1;
}
elsif (/^\./)
{
if (!/^\.\s*$|
^\.B\s+\S|
^\.TH\s\S|
^\.SH\s\S|
^\.SS\s\S|
^\.TP(?:\s?\d+)?\s*$|
^\.SM\s*$|
^\.br\s*$|
^\.rs\s*$|
^\.sp\s*$|
^\.nf\s*$|
^\.fi\s*$|
^\.P\s*$|
^\.PP\s*$|
^\.\\"(?:\ HREF)?\s*$|
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
^\.\\"\s<\/a>\s*$|
^\.\\"\sJOINSH\s*$|
^\.\\"\sJOIN\s*$/x
)
{
printf "Bad control line $line of $file\n";
$yield = 1;
}
}
elsif (/\\[^ef]|\\f[^IBP]/)
{
printf "Bad backslash in line $line of $file\n";
$yield = 1;
}
while (/\\f[BI]/g)
{
$count++;
}
while (/\\fP/g)
{
$count--;
}
if ($count != 0)
{
printf "Mismatching formatting in line $line of $file\n";
$yield = 1;
}
}
close(IN);
}
exit $yield;
# End

113
pcre2/CleanTxt vendored Executable file

@ -0,0 +1,113 @@
#! /usr/bin/perl -w
# Script to take the output of nroff -man and remove all the backspacing and
# the page footers and the screen commands etc so that it is more usefully
# readable online. In fact, in the latest nroff, intermediate footers don't
# seem to be generated any more.
$blankcount = 0;
$lastwascut = 0;
$firstheader = 1;
# Input on STDIN; output to STDOUT.
while (<STDIN>)
{
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
s/.\x8//g; # Remove "char, backspace"
# Handle header lines. Retain only the first one we encounter, but remove
# the blank line that follows. Any others (e.g. at end of document) and the
# following blank line are dropped.
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
{
if ($firstheader)
{
$firstheader = 0;
print;
$lastprinted = $_;
$lastwascut = 0;
}
$_=<STDIN>; # Remove a blank that follows
next;
}
# Count runs of empty lines
if (/^\s*$/)
{
$blankcount++;
$lastwascut = 0;
next;
}
# If a chunk of lines has been cut out (page footer) and the next line
# has a different indentation, put back one blank line.
if ($lastwascut && $blankcount < 1 && defined($lastprinted))
{
($a) = $lastprinted =~ /^(\s*)/;
($b) = $_ =~ /^(\s*)/;
$blankcount++ if ($a ne $b);
}
# We get here only when we have a non-blank line in hand. If it was preceded
# by 3 or more blank lines, read the next 3 lines and see if they are blank.
# If so, remove all 7 lines, and remember that we have just done a cut.
if ($blankcount >= 3)
{
for ($i = 0; $i < 3; $i++)
{
$next[$i] = <STDIN>;
$next[$i] = "" if !defined $next[$i];
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
$next[$i] =~ s/.\x8//g; # Remove "char, backspace"
}
# Cut out chunks of the form <3 blanks><non-blank><3 blanks>
if ($next[0] =~ /^\s*$/ &&
$next[1] =~ /^\s*$/ &&
$next[2] =~ /^\s*$/)
{
$blankcount -= 3;
$lastwascut = 1;
}
# Otherwise output the saved blanks, the current, and the next three
# lines. Remember the last printed line.
else
{
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
print;
for ($i = 0; $i < 3; $i++)
{
$next[$i] =~ s/.\x8//g;
print $next[$i];
$lastprinted = $_;
}
$lastwascut = 0;
$blankcount = 0;
}
}
# This non-blank line is not preceded by 3 or more blank lines. Output
# any blanks there are, and the line. Remember it. Force two blank lines
# before headings.
else
{
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
defined($lastprinted);
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
print;
$lastprinted = $_;
$lastwascut = 0;
$blankcount = 0;
}
}
# End

35
pcre2/Detrail vendored Executable file

@ -0,0 +1,35 @@
#!/usr/bin/perl
# This is a script for removing trailing whitespace from lines in files that
# are listed on the command line.
# This subroutine does the work for one file.
sub detrail {
my($file) = $_[0];
my($changed) = 0;
open(IN, "$file") || die "Can't open $file for input";
@lines = <IN>;
close(IN);
foreach (@lines)
{
if (/\s+\n$/)
{
s/\s+\n$/\n/;
$changed = 1;
}
}
if ($changed)
{
open(OUT, ">$file") || die "Can't open $file for output";
print OUT @lines;
close(OUT);
}
}
# This is the main program
$, = ""; # Output field separator
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
# End

830
pcre2/HACKING vendored Normal file

@ -0,0 +1,830 @@
Technical Notes about PCRE2
---------------------------
These are very rough technical notes that record potentially useful information
about PCRE2 internals. PCRE2 is a library based on the original PCRE library,
but with a revised (and incompatible) API. To avoid confusion, the original
library is referred to as PCRE1 below. For information about testing PCRE2, see
the pcre2test documentation and the comment at the head of the RunTest file.
PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix
releases remain in the 8.xx series. PCRE2 releases started at 10.00 to avoid
confusion with PCRE1.
Historical note 1
-----------------
Many years ago I implemented some regular expression functions to an algorithm
suggested by Martin Richards. The rather simple patterns were not Unix-like in
form, and were quite restricted in what they could do by comparison with Perl.
The interesting part about the algorithm was that the amount of space required
to hold the compiled form of an expression was known in advance. The code to
apply an expression did not operate by backtracking, as the original Henry
Spencer code and current PCRE2 and Perl code does, but instead checked all
possibilities simultaneously by keeping a list of current states and checking
all of them as it advanced through the subject string. In the terminology of
Jeffrey Friedl's book, it was a "DFA algorithm", though it was not a
traditional Finite State Machine (FSM). When the pattern was all used up, all
remaining states were possible matches, and the one matching the longest subset
of the subject string was chosen. This did not necessarily maximize the
individual wild portions of the pattern, as is expected in Unix and Perl-style
regular expressions.
Historical note 2
-----------------
By contrast, the code originally written by Henry Spencer (which was
subsequently heavily modified for Perl) compiles the expression twice: once in
a dummy mode in order to find out how much store will be needed, and then for
real. (The Perl version probably doesn't do this any more; I'm talking about
the original library.) The execution function operates by backtracking and
maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
matches individual wild portions of the pattern. This is an "NFA algorithm" in
Friedl's terminology.
OK, here's the real stuff
-------------------------
For the set of functions that formed the original PCRE1 library in 1997 (which
are unrelated to those mentioned above), I tried at first to invent an
algorithm that used an amount of store bounded by a multiple of the number of
characters in the pattern, to save on compiling time. However, because of the
greater complexity in Perl regular expressions, I couldn't do this, even though
the then current Perl 5.004 patterns were much simpler than those supported
nowadays. In any case, a first pass through the pattern is helpful for other
reasons.
Support for 16-bit and 32-bit data strings
-------------------------------------------
The PCRE2 library can be compiled in any combination of 8-bit, 16-bit or 32-bit
modes, creating up to three different libraries. In the description that
follows, the word "short" is used for a 16-bit data quantity, and the phrase
"code unit" is used for a quantity that is a byte in 8-bit mode, a short in
16-bit mode and a 32-bit word in 32-bit mode. The names of PCRE2 functions are
given in generic form, without the _8, _16, or _32 suffix.
Computing the memory requirement: how it was
--------------------------------------------
Up to and including release 6.7, PCRE1 worked by running a very degenerate
first pass to calculate a maximum memory requirement, and then a second pass to
do the real compile - which might use a bit less than the predicted amount of
memory. The idea was that this would turn out faster than the Henry Spencer
code because the first pass is degenerate and the second pass can just store
stuff straight into memory, which it knows is big enough.
Computing the memory requirement: how it is
-------------------------------------------
By the time I was working on a potential 6.8 release, the degenerate first pass
had become very complicated and hard to maintain. Indeed one of the early
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
I had a flash of inspiration as to how I could run the real compile function in
a "fake" mode that enables it to compute how much memory it would need, while
in most cases only ever using a small amount of working memory, and without too
many tests of the mode that might slow it down. So I refactored the compiling
functions to work this way. This got rid of about 600 lines of source and made
further maintenance and development easier. As this was such a major change, I
never released 6.8, instead upping the number to 7.0 (other quite major changes
were also present in the 7.0 release).
A side effect of this work was that the previous limit of 200 on the nesting
depth of parentheses was removed. However, there was a downside: compiling ran
more slowly than before (30% or more, depending on the pattern) because it now
did a full analysis of the pattern. My hope was that this would not be a big
issue, and in the event, nobody has commented on it.
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
(default 250, settable at build time) so as to put a limit on the amount of
system stack used by the compile function, which uses recursive function calls
for nested parenthesized groups. This is a safety feature for environments with
small stacks where the patterns are provided by users.
Yet another pattern scan
------------------------
History repeated itself for PCRE2 release 10.20. A number of bugs relating to
named subpatterns had been discovered by fuzzers. Most of these were related to
the handling of forward references when it was not known if the named group was
unique. (References to non-unique names use a different opcode and more
memory.) The use of duplicate group numbers (the (?| facility) also caused
issues.
To get around these problems I adopted a new approach by adding a third pass
over the pattern (really a "pre-pass"), which did nothing other than identify
all the named subpatterns and their corresponding group numbers. This means
that the actual compile (both the memory-computing dummy run and the real
compile) has full knowledge of group names and numbers throughout. Several
dozen lines of messy code were eliminated, though the new pre-pass was not
short. In particular, parsing and skipping over [] classes is complicated.
While working on 10.22 I realized that I could simplify yet again by moving
more of the parsing into the pre-pass, thus avoiding doing it in two places, so
after 10.22 was released, the code underwent yet another big refactoring. This
is how it is from 10.23 onwards:
The function called parse_regex() scans the pattern characters, parsing them
into literal data and meta characters. It converts escapes such as \x{123}
into literals, handles \Q...\E, and skips over comments and non-significant
white space. The result of the scanning is put into a vector of 32-bit unsigned
integers. Values less than 0x80000000 are literal data. Higher values represent
meta-characters. The top 16-bits of such values identify the meta-character,
and these are given names such as META_CAPTURE. The lower 16-bits are available
for data, for example, the capturing group number. The only situation in which
literal data values greater than 0x7fffffff can appear is when the 32-bit
library is running in non-UTF mode. This is handled by having a special
meta-character that is followed by the 32-bit data value.
The size of the parsed pattern vector, when auto-callouts are not enabled, is
bounded by the length of the pattern (with one exception). The code is written
so that each item in the pattern uses no more vector elements than the number
of code units in the item itself. The exception is the aforementioned large
32-bit number handling. For this reason, 32-bit non-UTF patterns are scanned in
advance to check for such values. When auto-callouts are enabled, the generous
assumption is made that there will be a callout for each pattern code unit
(which of course is only actually true if all code units are literals) plus one
at the end. There is a default parsed pattern vector on the system stack, but
if this is not big enough, heap memory is used.
As before, the actual compiling function is run twice, the first time to
determine the amount of memory needed for the final compiled pattern. It
now processes the parsed pattern vector, not the pattern itself, although some
of the parsed items refer to strings in the pattern - for example, group
names. As escapes and comments have already been processed, the code is a bit
simpler than before.
Most errors can be diagnosed during the parsing scan. For those that cannot
(for example, "lookbehind assertion is not fixed length"), the parsed code
contains offsets into the pattern so that the actual compiling code can
report where errors are.
The elements of the parsed pattern vector
-----------------------------------------
The word "offset" below means a code unit offset into the pattern. When
PCRE2_SIZE (which is usually size_t) is no bigger than uint32_t, an offset is
stored in a single parsed pattern element. Otherwise (typically on 64-bit
systems) it occupies two elements. The following meta items occupy just one
element, with no data:
META_ACCEPT (*ACCEPT)
META_ASTERISK *
META_ASTERISK_PLUS *+
META_ASTERISK_QUERY *?
META_ATOMIC (?> start of atomic group
META_CIRCUMFLEX ^ metacharacter
META_CLASS [ start of non-empty class
META_CLASS_EMPTY [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS
META_CLASS_EMPTY_NOT [^] negative empty class - ditto
META_CLASS_END ] end of non-empty class
META_CLASS_NOT [^ start non-empty negative class
META_COMMIT (*COMMIT)
META_COND_ASSERT (?(?assertion)
META_DOLLAR $ metacharacter
META_DOT . metacharacter
META_END End of pattern (this value is 0x80000000)
META_FAIL (*FAIL)
META_KET ) closing parenthesis
META_LOOKAHEAD (?= start of lookahead
META_LOOKAHEAD_NA (*napla: start of non-atomic lookahead
META_LOOKAHEADNOT (?! start of negative lookahead
META_NOCAPTURE (?: no capture parens
META_PLUS +
META_PLUS_PLUS ++
META_PLUS_QUERY +?
META_PRUNE (*PRUNE) - no argument
META_QUERY ?
META_QUERY_PLUS ?+
META_QUERY_QUERY ??
META_RANGE_ESCAPED hyphen in class range with at least one escape
META_RANGE_LITERAL hyphen in class range defined literally
META_SKIP (*SKIP) - no argument
META_THEN (*THEN) - no argument
The two RANGE values occur only in character classes. They are positioned
between two literals that define the start and end of the range. In an EBCDIC
evironment it is necessary to know whether either of the range values was
specified as an escape. In an ASCII/Unicode environment the distinction is not
relevant.
The following have data in the lower 16 bits, and may be followed by other data
elements:
META_ALT | alternation
META_BACKREF back reference
META_CAPTURE start of capturing group
META_ESCAPE non-literal escape sequence
META_RECURSE recursion call
If the data for META_ALT is non-zero, it is inside a lookbehind, and the data
is the length of its branch, for which OP_REVERSE must be generated.
META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as
their data in the lower 16 bits of the element.
META_BACKREF is followed by an offset if the back reference group number is 10
or more. The offsets of the first ocurrences of references to groups whose
numbers are less than 10 are put in cb->small_ref_offset[] (only the first
occurrence is useful). On 64-bit systems this avoids using more than two parsed
pattern elements for items such as \3. The offset is used when an error occurs
because the reference is to a non-existent group.
META_RECURSE is always followed by an offset, for use in error messages.
META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
element contains the 16-bit type and data property values, packed together.
ESC_g and ESC_k are used only for named references - numerical ones are turned
into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed
by a length and an offset into the pattern to specify the name.
The following have one data item that follows in the next vector element:
META_BIGVALUE Next is a literal >= META_END
META_OPTIONS (?i) and friends (data is new option bits)
META_POSIX POSIX class item (data identifies the class)
META_POSIX_NEG negative POSIX class item (ditto)
The following are followed by a length element, then a number of character code
values (which should match with the length):
META_MARK (*MARK:xxxx)
META_COMMIT_ARG )*COMMIT:xxxx)
META_PRUNE_ARG (*PRUNE:xxx)
META_SKIP_ARG (*SKIP:xxxx)
META_THEN_ARG (*THEN:xxxx)
The following are followed by a length element, then an offset in the pattern
that identifies the name:
META_COND_NAME (?(<name>) or (?('name') or (?(name)
META_COND_RNAME (?(R&name)
META_COND_RNUMBER (?(Rdigits)
META_RECURSE_BYNAME (?&name)
META_BACKREF_BYNAME \k'name'
META_COND_RNUMBER is used for names that start with R and continue with digits,
because this is an ambiguous case. It could be a back reference to a group with
that name, or it could be a recursion test on a numbered group.
This one is followed by an offset, for use in error messages, then a number:
META_COND_NUMBER (?([+-]digits)
The following is followed just by an offset, for use in error messages:
META_COND_DEFINE (?(DEFINE)
The following are also followed just by an offset, but also the lower 16 bits
of the main word contain the length of the first branch of the lookbehind
group; this is used when generating OP_REVERSE for that branch.
META_LOOKBEHIND (?<= start of lookbehind
META_LOOKBEHIND_NA (*naplb: start of non-atomic lookbehind
META_LOOKBEHINDNOT (?<! start of negative lookbehind
The following are followed by two elements, the minimum and maximum. Repeat
values are limited to 65535 (MAX_REPEAT). A maximum value of "unlimited" is
represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:
META_MINMAX {n,m} repeat
META_MINMAX_PLUS {n,m}+ repeat
META_MINMAX_QUERY {n,m}? repeat
This one is followed by three elements. The first is 0 for '>' and 1 for '>=';
the next two are the major and minor numbers:
META_COND_VERSION (?(VERSION<op>x.y)
Callouts are converted into one of two items:
META_CALLOUT_NUMBER (?C with numerical argument
META_CALLOUT_STRING (?C with string argument
In both cases, the next two elements contain the offset and length of the next
item in the pattern. Then there is either one callout number, or a length and
an offset for the string argument. The length includes both delimiters.
Traditional matching function
-----------------------------
The "traditional", and original, matching function is called pcre2_match(), and
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
and the way that Perl works. This is not surprising, since it is intended to be
as compatible with Perl as possible. This is the function most users of PCRE2
will use most of the time. If PCRE2 is compiled with just-in-time (JIT)
support, and studying a compiled pattern with JIT is successful, the JIT code
is run instead of the normal pcre2_match() code, but the result is the same.
Supplementary matching function
-------------------------------
There is also a supplementary matching function called pcre2_dfa_match(). This
implements a DFA matching algorithm that searches simultaneously for all
possible matches that start at one point in the subject string. (Going back to
my roots: see Historical Note 1 above.) This function intreprets the same
compiled pattern data as pcre2_match(); however, not all the facilities are
available, and those that are do not always work in quite the same way. See the
user documentation for details.
The algorithm that is used for pcre2_dfa_match() is not a traditional FSM,
because it may have a number of states active at one time. More work would be
needed at compile time to produce a traditional FSM where only one state is
ever active at once. I believe some other regex matchers work this way. JIT
support is not available for this kind of matching.
Changeable options
------------------
The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
others) may be changed in the middle of patterns by items such as (?i). Their
processing is handled entirely at compile time by generating different opcodes
for the different settings. The runtime functions do not need to keep track of
an option's state.
PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
are tracked and processed during the parsing pre-pass. The others are handled
from META_OPTIONS items during the main compile phase.
Format of compiled patterns
---------------------------
The compiled form of a pattern is a vector of unsigned code units (bytes in
8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing
items of variable length. The first code unit in an item contains an opcode,
and the length of the item is either implicit in the opcode or contained in the
data that follows it.
In many cases listed below, LINK_SIZE data values are specified for offsets
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
default value for LINK_SIZE is 2, except for the 32-bit library, where it can
only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values,
and the 16-bit library can be compiled to use 4-byte values, though this
impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is
necessary only when patterns whose compiled length is greater than 65535 code
units are going to be processed. When a LINK_SIZE value uses more than one code
unit, the most significant unit is first.
In this description, we assume the "normal" compilation options. Data values
that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
(most significant byte first), and one code unit in 16-bit and 32-bit modes.
Opcodes with no following data
------------------------------
These items are all just one unit long:
OP_END end of pattern
OP_ANY match any one character other than newline
OP_ALLANY match any one character, including newline
OP_ANYBYTE match any single code unit, even in UTF-8/16 mode
OP_SOD match start of data: \A
OP_SOM, start of match (subject + offset): \G
OP_SET_SOM, set start of match (\K)
OP_CIRC ^ (start of data)
OP_CIRCM ^ multiline mode (start of data or after newline)
OP_NOT_WORD_BOUNDARY \W
OP_WORD_BOUNDARY \w
OP_NOT_DIGIT \D
OP_DIGIT \d
OP_NOT_HSPACE \H
OP_HSPACE \h
OP_NOT_WHITESPACE \S
OP_WHITESPACE \s
OP_NOT_VSPACE \V
OP_VSPACE \v
OP_NOT_WORDCHAR \W
OP_WORDCHAR \w
OP_EODN match end of data or newline at end: \Z
OP_EOD match end of data: \z
OP_DOLL $ (end of data, or before final newline)
OP_DOLLM $ multiline mode (end of data or before newline)
OP_EXTUNI match an extended Unicode grapheme cluster
OP_ANYNL match any Unicode newline sequence
OP_ASSERT_ACCEPT )
OP_ACCEPT ) These are Perl 5.10's "backtracking control
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
OP_FAIL ) parentheses, it may be preceded by one or more
OP_PRUNE ) OP_CLOSE, each followed by a number that
OP_SKIP ) indicates which parentheses must be closed.
OP_THEN )
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
This ends the assertion, not the entire pattern match. The assertion (?!) is
always optimized to OP_FAIL.
OP_ALLANY is used for '.' when PCRE2_DOTALL is set. It is also used for \C in
non-UTF modes and in UTF-32 mode (since one code unit still equals one
character). Another use is for [^] when empty classes are permitted
(PCRE2_ALLOW_EMPTY_CLASS is set).
Backtracking control verbs
--------------------------
Verbs with no arguments generate opcodes with no following data (as listed
in the section above).
(*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a
length in one code unit, and followed by a binary zero. The name length is
limited by the size of the code unit.
(*ACCEPT:NAME) and (*FAIL:NAME) are compiled as (*MARK:NAME)(*ACCEPT) and
(*MARK:NAME)(*FAIL) respectively.
For (*COMMIT:NAME), (*PRUNE:NAME), (*SKIP:NAME), and (*THEN:NAME), the opcodes
OP_COMMIT_ARG, OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the
name following in the same format as for OP_MARK.
Matching literal characters
---------------------------
The OP_CHAR opcode is followed by a single character that is to be matched
casefully. For caseless matching of characters that have at most two
case-equivalent code points, OP_CHARI is used. In UTF-8 or UTF-16 modes, the
character may be more than one code unit long. In UTF-32 mode, characters are
always exactly one code unit long.
If there is only one character in a character class, OP_CHAR or OP_CHARI is
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
for something like [^a]).
Caseless matching (positive or negative) of characters that have more than two
case-equivalent code points (which is possible only in UTF mode) is handled by
compiling a Unicode property item (see below), with the pseudo-property
PT_CLIST. The value of this property is an offset in a vector called
"ucd_caseless_sets" which identifies the start of a short list of equivalent
characters, terminated by the value NOTACHAR (0xffffffff).
Repeating single characters
---------------------------
The common repeats (*, +, ?), when applied to a single character, use the
following opcodes, which come in caseful and caseless versions:
Caseful Caseless
OP_STAR OP_STARI
OP_MINSTAR OP_MINSTARI
OP_POSSTAR OP_POSSTARI
OP_PLUS OP_PLUSI
OP_MINPLUS OP_MINPLUSI
OP_POSPLUS OP_POSPLUSI
OP_QUERY OP_QUERYI
OP_MINQUERY OP_MINQUERYI
OP_POSQUERY OP_POSQUERYI
Each opcode is followed by the character that is to be repeated. In ASCII or
UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the
length is variable. Those with "MIN" in their names are the minimizing
versions. Those with "POS" in their names are possessive versions. Other kinds
of repeat make use of these opcodes:
Caseful Caseless
OP_UPTO OP_UPTOI
OP_MINUPTO OP_MINUPTOI
OP_POSUPTO OP_POSUPTOI
OP_EXACT OP_EXACTI
Each of these is followed by a count and then the repeated character. The count
is two bytes long in 8-bit mode (most significant byte first), or one code unit
in 16-bit and 32-bit modes.
OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum
and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or
OP_MINUPTO or OPT_POSUPTO).
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
etc.) are used for repeated, negated, single-character classes such as [^a]*.
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
positive single-character classes.
Repeating character types
-------------------------
Repeats of things like \d are done exactly as for single characters, except
that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored
in the next code unit. The opcodes are:
OP_TYPESTAR
OP_TYPEMINSTAR
OP_TYPEPOSSTAR
OP_TYPEPLUS
OP_TYPEMINPLUS
OP_TYPEPOSPLUS
OP_TYPEQUERY
OP_TYPEMINQUERY
OP_TYPEPOSQUERY
OP_TYPEUPTO
OP_TYPEMINUPTO
OP_TYPEPOSUPTO
OP_TYPEEXACT
Match by Unicode property
-------------------------
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
character by testing its Unicode property (the \p and \P escape sequences).
Each is followed by two code units that encode the desired property as a type
and a value. The types are a set of #defines of the form PT_xxx, and the values
are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
The value is relevant only for PT_GC (General Category), PT_PC (Particular
Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to
identify a list of case-equivalent characters when there are three or more.
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
value.
Character classes
-----------------
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
something like [^a]), except when caselessly matching a character that has more
than two case-equivalent code points (which can happen only in UTF mode). In
this case a Unicode property item is used, as described above in "Matching
literal characters".
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
negated, single-character classes. The normal single-character opcodes
(OP_STAR, etc.) are used for repeated positive single-character classes.
When there is more than one character in a class, and all the code points are
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
negative one. In either case, the opcode is followed by a 32-byte (16-short,
8-word) bit map containing a 1 bit for every character that is acceptable. The
bits are counted from the least significant end of each unit. In caseless mode,
bits for both cases are set.
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and
16-bit and 32-bit modes, subject characters with values greater than 255 can be
handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they
do.
For classes containing characters with values greater than 255 or that contain
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable
code points are less than 256, followed by a list of pairs (for a range) and/or
single characters and/or properties. In caseless mode, all equivalent
characters are explicitly listed.
OP_XCLASS is followed by a LINK_SIZE value containing the total length of the
opcode and its data. This is followed by a code unit containing flag bits:
XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a
bit map is present. There follows the bit map, if XCL_MAP is set, and then a
sequence of items coded as follows:
XCL_END marks the end of the list
XCL_SINGLE one character follows
XCL_RANGE two characters follow
XCL_PROP a Unicode property (type, value) follows
XCL_NOTPROP a Unicode property (type, value) follows
If a range starts with a code point less than 256 and ends with one greater
than 255, it is split into two ranges, with characters less than 256 being
indicated in the bit map, and the rest with XCL_RANGE.
When XCL_NOT is set, the bit map, if present, contains bits for characters that
are allowed (exactly as for OP_NCLASS), but the list of items that follow it
specifies characters and properties that are not allowed.
Back references
---------------
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
reference number when the reference is to a unique capturing group (either by
number or by name). When named groups are used, there may be more than one
group with the same name. In this case, a reference to such a group by name
generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index
(not the byte offset) in the group name table of the first entry for the
required name, followed by the number of groups with the same name. The
matching code can then search for the first one that is set.
Repeating character classes and back references
-----------------------------------------------
Single-character classes are handled specially (see above). This section
applies to other classes and also to back references. In both cases, the repeat
information follows the base item. The matching code looks at the following
opcode to see if it is one of these:
OP_CRSTAR
OP_CRMINSTAR
OP_CRPOSSTAR
OP_CRPLUS
OP_CRMINPLUS
OP_CRPOSPLUS
OP_CRQUERY
OP_CRMINQUERY
OP_CRPOSQUERY
OP_CRRANGE
OP_CRMINRANGE
OP_CRPOSRANGE
All but the last three are single-code-unit items, with no data. The range
opcodes are followed by the minimum and maximum repeat counts.
Brackets and alternation
------------------------
A pair of non-capturing round brackets is wrapped round each expression at
compile time, so alternation always happens in the context of brackets.
[Note for North Americans: "bracket" to some English speakers, including
myself, can be round, square, curly, or pointy. Hence this usage rather than
"parentheses".]
Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A
bracket opcode is followed by a LINK_SIZE value which gives the offset to the
next alternative OP_ALT or, if there aren't any branches, to the terminating
opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset to the
next one, or to the final opcode. For capturing brackets, the bracket number is
a count that immediately follows the offset.
There are several opcodes that mark the end of a subpattern group. OP_KET is
used for subpatterns that do not repeat indefinitely, OP_KETRMIN and
OP_KETRMAX are used for indefinite repetitions, minimally or maximally
respectively, and OP_KETRPOS for possessive repetitions (see below for more
details). All four are followed by a LINK_SIZE value giving (as a positive
number) the offset back to the matching bracket opcode.
If a subpattern is quantified such that it is permitted to match zero times, it
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
single-unit opcodes that tell the matcher that skipping the following
subpattern entirely is a valid match. In the case of the first two, not
skipping the pattern is also valid (greedy and non-greedy). The third is used
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
because it may be called as a subroutine from elsewhere in the pattern.
A subpattern with an indefinite maximum repetition is replicated in the
compiled data its minimum number of times (or once with OP_BRAZERO if the
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
as appropriate.
A subpattern with a bounded maximum repetition is replicated in a nested
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
before each replication after the minimum, so that, for example, (abc){2,5} is
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
has the same number.
When a repeated subpattern has an unbounded upper limit, it is checked to see
whether it could match an empty string. If this is the case, the opcode in the
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
that it needs to check for matching an empty string when it hits OP_KETRMIN or
OP_KETRMAX, and if so, to break the loop.
Possessive brackets
-------------------
When a repeated group (capturing or non-capturing) is marked as possessive by
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
repetition is zero, the group is preceded by OP_BRAPOSZERO.
Once-only (atomic) groups
-------------------------
These are just like other subpatterns, but they start with the opcode OP_ONCE.
The check for matching an empty string in an unbounded repeat is handled
entirely at runtime, so there is just this one opcode for atomic groups.
Assertions
----------
Forward assertions are also just like other subpatterns, but starting with one
of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or
OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK,
OP_ASSERTBACK_NA, and OP_ASSERTBACK_NOT, and the first opcode inside the
assertion is OP_REVERSE, followed by a count of the number of characters to
move back the pointer in the subject string. In ASCII or UTF-32 mode, the count
is also the number of code units, but in UTF-8/16 mode each character may
occupy more than one code unit. A separate count is present in each alternative
of a lookbehind assertion, allowing each branch to have a different (but fixed)
length.
Conditional subpatterns
-----------------------
These are like other subpatterns, but they start with the opcode OP_COND, or
OP_SCOND for one that might match an empty string in an unbounded repeat.
If the condition is a back reference, this is stored at the start of the
subpattern using the opcode OP_CREF followed by a count containing the
reference number, provided that the reference is to a unique capturing group.
If the reference was by name and there is more than one group with that name,
OP_DNCREF is used instead. It is followed by two counts: the index in the group
names table, and the number of groups with the same name. The allows the
matcher to check if any group with the given name is set.
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the
whole pattern") or OP_DNRREF (with data as for OP_DNCREF).
For a DEFINE condition, OP_FALSE is used (with no associated data). During
compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when
the conditional group is complete, there can be a check to ensure that it
contains only one top-level branch. Once this has happened, the opcode is
changed to OP_FALSE, so the matcher never sees OP_DEFINE.
There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which
tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE
or OP_FALSE.
If a condition is not a back reference, recursion test, DEFINE, or VERSION, it
must start with a parenthesized atomic assertion, whose opcode normally
immediately follows OP_COND or OP_SCOND. However, if automatic callouts are
enabled, a callout is inserted immediately before the assertion. It is also
possible to insert a manual callout at this point. Only assertion conditions
may have callouts preceding the condition.
A condition that is the negative assertion (?!) is optimized to OP_FAIL in all
parts of the pattern, so this is another opcode that may appear as a condition.
It is treated the same as OP_FALSE.
Recursion
---------
Recursion either matches the current pattern, or some subexpression. The opcode
OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting
bracket from the start of the whole pattern. OP_RECURSE is also used for
"subroutine" calls, even though they are not strictly a recursion. Up till
release 10.30 recursions were treated as atomic groups, making them
incompatible with Perl (but PCRE had them well before Perl did). From 10.30,
backtracking into recursions is supported.
Repeated recursions used to be wrapped inside OP_ONCE brackets, which not only
forced no backtracking, but also allowed repetition to be handled as for other
bracketed groups. From 10.30 onwards, repeated recursions are duplicated for
their minimum repetitions, and then wrapped in non-capturing brackets for the
remainder. For example, (?1){3} is treated as (?1)(?1)(?1), and (?1){2,4} is
treated as (?1)(?1)(?:(?1)){0,2}.
Callouts
--------
A callout may have either a numerical argument or a string argument. These use
OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are followed by
two LINK_SIZE values giving the offset in the pattern string to the start of
the following item, and another count giving the length of this item. These
values make it possible for pcre2test to output useful tracing information
using callouts.
In the case of a numeric callout, after these two values there is a single code
unit containing the callout number, in the range 0-255, with 255 being used for
callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT
option. Thus, this opcode item is of fixed length:
[OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER]
For callouts with string arguments, OP_CALLOUT_STR has three more data items:
a LINK_SIZE value giving the complete length of the entire opcode item, a
LINK_SIZE item containing the offset within the pattern string to the start of
the string argument, and the string itself, preceded by its starting delimiter
and followed by a binary zero. When a callout function is called, a pointer to
the actual string is passed, but the delimiter can be accessed as string[-1] if
the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is
compiled as the following bytes (decimal numbers represent binary values):
[OP_CALLOUT_STR] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0]
-------- ------- -------- -------
| | | |
------- LINK_SIZE items ------
Opcode table checking
---------------------
The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is
not a real opcode, but is used to check at compile time that tables indexed by
opcode are the correct length, in order to catch updating errors.
Philip Hazel
12 July 2019

368
pcre2/INSTALL vendored Normal file

@ -0,0 +1,368 @@
Installation Instructions
*************************
Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software
Foundation, Inc.
Copying and distribution of this file, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. This file is offered as-is,
without warranty of any kind.
Basic Installation
==================
Briefly, the shell command './configure && make && make install'
should configure, build, and install this package. The following
more-detailed instructions are generic; see the 'README' file for
instructions specific to this package. Some packages provide this
'INSTALL' file but do not implement all of the features documented
below. The lack of an optional feature in a given package is not
necessarily a bug. More recommendations for GNU packages can be found
in *note Makefile Conventions: (standards)Makefile Conventions.
The 'configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
those values to create a 'Makefile' in each directory of the package.
It may also create one or more '.h' files containing system-dependent
definitions. Finally, it creates a shell script 'config.status' that
you can run in the future to recreate the current configuration, and a
file 'config.log' containing compiler output (useful mainly for
debugging 'configure').
It can also use an optional file (typically called 'config.cache' and
enabled with '--cache-file=config.cache' or simply '-C') that saves the
results of its tests to speed up reconfiguring. Caching is disabled by
default to prevent problems with accidental use of stale cache files.
If you need to do unusual things to compile the package, please try
to figure out how 'configure' could check whether to do them, and mail
diffs or instructions to the address given in the 'README' so they can
be considered for the next release. If you are using the cache, and at
some point 'config.cache' contains results you don't want to keep, you
may remove or edit it.
The file 'configure.ac' (or 'configure.in') is used to create
'configure' by a program called 'autoconf'. You need 'configure.ac' if
you want to change it or regenerate 'configure' using a newer version of
'autoconf'.
The simplest way to compile this package is:
1. 'cd' to the directory containing the package's source code and type
'./configure' to configure the package for your system.
Running 'configure' might take a while. While running, it prints
some messages telling which features it is checking for.
2. Type 'make' to compile the package.
3. Optionally, type 'make check' to run any self-tests that come with
the package, generally using the just-built uninstalled binaries.
4. Type 'make install' to install the programs and any data files and
documentation. When installing into a prefix owned by root, it is
recommended that the package be configured and built as a regular
user, and only the 'make install' phase executed with root
privileges.
5. Optionally, type 'make installcheck' to repeat any self-tests, but
this time using the binaries in their final installed location.
This target does not install anything. Running this target as a
regular user, particularly if the prior 'make install' required
root privileges, verifies that the installation completed
correctly.
6. You can remove the program binaries and object files from the
source code directory by typing 'make clean'. To also remove the
files that 'configure' created (so you can compile the package for
a different kind of computer), type 'make distclean'. There is
also a 'make maintainer-clean' target, but that is intended mainly
for the package's developers. If you use it, you may have to get
all sorts of other programs in order to regenerate files that came
with the distribution.
7. Often, you can also type 'make uninstall' to remove the installed
files again. In practice, not all packages have tested that
uninstallation works correctly, even though it is required by the
GNU Coding Standards.
8. Some packages, particularly those that use Automake, provide 'make
distcheck', which can by used by developers to test that all other
targets like 'make install' and 'make uninstall' work correctly.
This target is generally not run by end users.
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that
the 'configure' script does not know about. Run './configure --help'
for details on some of the pertinent environment variables.
You can give 'configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here is
an example:
./configure CC=c99 CFLAGS=-g LIBS=-lposix
*Note Defining Variables::, for more details.
Compiling For Multiple Architectures
====================================
You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you can use GNU 'make'. 'cd' to the
directory where you want the object files and executables to go and run
the 'configure' script. 'configure' automatically checks for the source
code in the directory that 'configure' is in and in '..'. This is known
as a "VPATH" build.
With a non-GNU 'make', it is safer to compile the package for one
architecture at a time in the source code directory. After you have
installed the package for one architecture, use 'make distclean' before
reconfiguring for another architecture.
On MacOS X 10.5 and later systems, you can create libraries and
executables that work on multiple system types--known as "fat" or
"universal" binaries--by specifying multiple '-arch' options to the
compiler but only a single '-arch' option to the preprocessor. Like
this:
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CPP="gcc -E" CXXCPP="g++ -E"
This is not guaranteed to produce working output in all cases, you
may have to build one architecture at a time and combine the results
using the 'lipo' tool if you have problems.
Installation Names
==================
By default, 'make install' installs the package's commands under
'/usr/local/bin', include files under '/usr/local/include', etc. You
can specify an installation prefix other than '/usr/local' by giving
'configure' the option '--prefix=PREFIX', where PREFIX must be an
absolute file name.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
pass the option '--exec-prefix=PREFIX' to 'configure', the package uses
PREFIX as the prefix for installing programs and libraries.
Documentation and other data files still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like '--bindir=DIR' to specify different values for particular
kinds of files. Run 'configure --help' for a list of the directories
you can set and what kinds of files go in them. In general, the default
for these options is expressed in terms of '${prefix}', so that
specifying just '--prefix' will affect all of the other directory
specifications that were not explicitly provided.
The most portable way to affect installation locations is to pass the
correct locations to 'configure'; however, many packages provide one or
both of the following shortcuts of passing variable assignments to the
'make install' command line to change installation locations without
having to reconfigure or recompile.
The first method involves providing an override variable for each
affected directory. For example, 'make install
prefix=/alternate/directory' will choose an alternate location for all
directory configuration variables that were expressed in terms of
'${prefix}'. Any directories that were specified during 'configure',
but not in terms of '${prefix}', must each be overridden at install time
for the entire installation to be relocated. The approach of makefile
variable overrides for each directory variable is required by the GNU
Coding Standards, and ideally causes no recompilation. However, some
platforms have known limitations with the semantics of shared libraries
that end up requiring recompilation when using this method, particularly
noticeable in packages that use GNU Libtool.
The second method involves providing the 'DESTDIR' variable. For
example, 'make install DESTDIR=/alternate/directory' will prepend
'/alternate/directory' before all installation names. The approach of
'DESTDIR' overrides is not required by the GNU Coding Standards, and
does not work on platforms that have drive letters. On the other hand,
it does better at avoiding recompilation issues, and works well even
when some directory options were not specified in terms of '${prefix}'
at 'configure' time.
Optional Features
=================
If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving 'configure' the
option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'.
Some packages pay attention to '--enable-FEATURE' options to
'configure', where FEATURE indicates an optional part of the package.
They may also pay attention to '--with-PACKAGE' options, where PACKAGE
is something like 'gnu-as' or 'x' (for the X Window System). The
'README' should mention any '--enable-' and '--with-' options that the
package recognizes.
For packages that use the X Window System, 'configure' can usually
find the X include and library files automatically, but if it doesn't,
you can use the 'configure' options '--x-includes=DIR' and
'--x-libraries=DIR' to specify their locations.
Some packages offer the ability to configure how verbose the
execution of 'make' will be. For these packages, running './configure
--enable-silent-rules' sets the default to minimal output, which can be
overridden with 'make V=1'; while running './configure
--disable-silent-rules' sets the default to verbose, which can be
overridden with 'make V=0'.
Particular systems
==================
On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC
is not installed, it is recommended to use the following options in
order to use an ANSI C compiler:
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
HP-UX 'make' updates targets which have the same time stamps as their
prerequisites, which makes it generally unusable when shipped generated
files such as 'configure' are involved. Use GNU 'make' instead.
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
parse its '<wchar.h>' header file. The option '-nodtk' can be used as a
workaround. If GNU CC is not installed, it is therefore recommended to
try
./configure CC="cc"
and if that doesn't work, try
./configure CC="cc -nodtk"
On Solaris, don't put '/usr/ucb' early in your 'PATH'. This
directory contains several dysfunctional programs; working variants of
these programs are available in '/usr/bin'. So, if you need '/usr/ucb'
in your 'PATH', put it _after_ '/usr/bin'.
On Haiku, software installed for all users goes in '/boot/common',
not '/usr/local'. It is recommended to use the following options:
./configure --prefix=/boot/common
Specifying the System Type
==========================
There may be some features 'configure' cannot figure out
automatically, but needs to determine by the type of machine the package
will run on. Usually, assuming the package is built to be run on the
_same_ architectures, 'configure' can figure that out, but if it prints
a message saying it cannot guess the machine type, give it the
'--build=TYPE' option. TYPE can either be a short name for the system
type, such as 'sun4', or a canonical name which has the form:
CPU-COMPANY-SYSTEM
where SYSTEM can have one of these forms:
OS
KERNEL-OS
See the file 'config.sub' for the possible values of each field. If
'config.sub' isn't included in this package, then this package doesn't
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
use the option '--target=TYPE' to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
platform different from the build platform, you should specify the
"host" platform (i.e., that on which the generated programs will
eventually be run) with '--host=TYPE'.
Sharing Defaults
================
If you want to set default values for 'configure' scripts to share,
you can create a site shell script called 'config.site' that gives
default values for variables like 'CC', 'cache_file', and 'prefix'.
'configure' looks for 'PREFIX/share/config.site' if it exists, then
'PREFIX/etc/config.site' if it exists. Or, you can set the
'CONFIG_SITE' environment variable to the location of the site script.
A warning: not all 'configure' scripts look for a site script.
Defining Variables
==================
Variables not defined in a site shell script can be set in the
environment passed to 'configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
them in the 'configure' command line, using 'VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
causes the specified 'gcc' to be used as the C compiler (unless it is
overridden in the site shell script).
Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an
Autoconf limitation. Until the limitation is lifted, you can use this
workaround:
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
'configure' Invocation
======================
'configure' recognizes the following options to control how it
operates.
'--help'
'-h'
Print a summary of all of the options to 'configure', and exit.
'--help=short'
'--help=recursive'
Print a summary of the options unique to this package's
'configure', and exit. The 'short' variant lists options used only
in the top level, while the 'recursive' variant lists options also
present in any nested packages.
'--version'
'-V'
Print the version of Autoconf used to generate the 'configure'
script, and exit.
'--cache-file=FILE'
Enable the cache: use and save the results of the tests in FILE,
traditionally 'config.cache'. FILE defaults to '/dev/null' to
disable caching.
'--config-cache'
'-C'
Alias for '--cache-file=config.cache'.
'--quiet'
'--silent'
'-q'
Do not print messages saying which checks are being made. To
suppress all normal output, redirect it to '/dev/null' (any error
messages will still be shown).
'--srcdir=DIR'
Look for the package's source code in directory DIR. Usually
'configure' can determine that directory automatically.
'--prefix=DIR'
Use DIR as the installation prefix. *note Installation Names:: for
more details, including other options available for fine-tuning the
installation locations.
'--no-create'
'-n'
Run the configure checks, but stop before creating any output
files.
'configure' also accepts some other, not widely useful, options. Run
'configure --help' for more details.

4
pcre2/LICENCE vendored

@ -20,8 +20,8 @@ THE BASIC LIBRARY FUNCTIONS
---------------------------
Written by: Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
Email local part: Philip.Hazel
Email domain: gmail.com
University of Cambridge Computing Service,
Cambridge, England.

347
pcre2/NEWS vendored Normal file

@ -0,0 +1,347 @@
News about PCRE2 releases
-------------------------
Version 10.36 04-December-2020
------------------------------
Again, mainly bug fixes and tidies. The only enhancements are the addition of
GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the
handling of substitution strings for both -O and callouts in pcre2grep, with
the addition of $x{...} and $o{...} to allow for characters whose code points
are greater than 255 in Unicode mode.
NOTE: there is an outstanding issue with JIT support for MacOS on arm64
hardware. For details, please see Bugzilla issue #2618.
Version 10.35 15-April-2020
---------------------------
Bugfixes, tidies, and a few new enhancements.
1. Capturing groups that contain recursive backreferences to themselves are no
longer automatically atomic, because the restriction is no longer necessary
as a result of the 10.30 restructuring.
2. Several new options for pcre2_substitute().
3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode
character properties are used for upper/lower case computations on characters
whose code points are greater than 127.
4. The character tables (for low-valued characters) can now more easily be
saved and restored in binary.
5. Updated to Unicode 13.0.0.
Version 10.34 21-November-2019
------------------------------
Another release with a few enhancements as well as bugfixes and tidies. The
main new features are:
1. There is now some support for matching in invalid UTF strings.
2. Non-atomic positive lookarounds are implemented in the pcre2_match()
interpreter, but not in JIT.
3. Added two new functions: pcre2_get_match_data_size() and
pcre2_maketables_free().
4. Upgraded to Unicode 12.1.0.
Version 10.33 16-April-2019
---------------------------
Yet more bugfixes, tidies, and a few enhancements, summarized here (see
ChangeLog for the full list):
1. Callouts from pcre2_substitute() are now available.
2. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
names are defined as macros. This should help avoid linking with the wrong
library in some environments, while still exporting the POSIX names for
pre-existing programs that use them.
3. Some new options:
(a) PCRE2_EXTRA_ESCAPED_CR_IS_LF makes \r behave as \n.
(b) PCRE2_EXTRA_ALT_BSUX enables support for ECMAScript 6's \u{hh...}
construct.
(c) PCRE2_COPY_MATCHED_SUBJECT causes a copy of a matched subject to be
made, instead of just remembering a pointer.
4. Some new Perl features:
(a) Perl 5.28's experimental alphabetic names for atomic groups and
lookaround assertions, for example, (*pla:...) and (*atomic:...).
(b) The new Perl "script run" features (*script_run:...) and
(*atomic_script_run:...) aka (*sr:...) and (*asr:...).
(c) When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in
capture group names.
5. --disable-percent-zt disables the use of %zu and %td in formatting strings
in pcre2test. They were already automatically disabled for VC and older C
compilers.
6. Some changes related to callouts in pcre2grep:
(a) Support for running an external program under VMS has been added, in
addition to Windows and fork() support.
(b) --disable-pcre2grep-callout-fork restricts the callout support in
to the inbuilt echo facility.
Version 10.32 10-September-2018
-------------------------------
This is another mainly bugfix and tidying release with a few minor
enhancements. These are the main ones:
1. pcre2grep now supports the inclusion of binary zeros in patterns that are
read from files via the -f option.
2. ./configure now supports --enable-jit=auto, which automatically enables JIT
if the hardware supports it.
3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for
local workspace and local ovectors. Instead, an initial block of stack is
reserved, but if this is insufficient, heap memory is used. The heap limit
parameter now applies to pcre2_dfa_match().
4. Updated to Unicode version 11.0.0.
5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
6. Added support for \N{U+dddd}, but only in Unicode mode.
7. Added support for (?^) to unset all imnsx options.
Version 10.31 12-February-2018
------------------------------
This is mainly a bugfix and tidying release (see ChangeLog for full details).
However, there are some minor enhancements.
1. New pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and
PCRE2_CONFIG_COMPILED_WIDTHS.
2. New pcre2_pattern_info() option PCRE2_INFO_EXTRAOPTIONS to retrieve the
extra compile time options.
3. There are now public names for all the pcre2_compile() error numbers.
4. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new
field callout_flags in callout blocks.
Version 10.30 14-August-2017
----------------------------
The full list of changes that includes bugfixes and tidies is, as always, in
ChangeLog. These are the most important new features:
1. The main interpreter, pcre2_match(), has been refactored into a new version
that does not use recursive function calls (and therefore the system stack) for
remembering backtracking positions. This makes --disable-stack-for-recursion a
NOOP. The new implementation allows backtracking into recursive group calls in
patterns, making it more compatible with Perl, and also fixes some other
previously hard-to-do issues. For patterns that have a lot of backtracking, the
heap is now used, and there is an explicit limit on the amount, settable by
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). The "recursion limit" is retained,
but is renamed as "depth limit" (though the old names remain for
compatibility).
There is also a change in the way callouts from pcre2_match() are handled. The
offset_vector field in the callout block is no longer a pointer to the
actual ovector that was passed to the matching function in the match data
block. Instead it points to an internal ovector of a size large enough to hold
all possible captured substrings in the pattern.
2. The new option PCRE2_ENDANCHORED insists that a pattern match must end at
the end of the subject.
3. The new option PCRE2_EXTENDED_MORE implements Perl's /xx feature, and
pcre2test is upgraded to support it. Setting within the pattern by (?xx) is
also supported.
4. (?n) can be used to set PCRE2_NO_AUTO_CAPTURE, because Perl now has this.
5. Additional compile options in the compile context are now available, and the
first two are: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES and
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
6. The newline type PCRE2_NEWLINE_NUL is now available.
7. The match limit value now also applies to pcre2_dfa_match() as there are
patterns that can use up a lot of resources without necessarily recursing very
deeply.
8. The option REG_PEND (a GNU extension) is now available for the POSIX
wrapper. Also there is a new option PCRE2_LITERAL which is used to support
REG_NOSPEC.
9. PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are implemented for the
benefit of pcre2grep, and pcre2grep's -F, -w, and -x options are re-implemented
using PCRE2_LITERAL, PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This
is tidier and also fixes some bugs.
10. The Unicode tables are upgraded from Unicode 8.0.0 to Unicode 10.0.0.
11. There are some experimental functions for converting foreign patterns
(globs and POSIX patterns) into PCRE2 patterns.
Version 10.23 14-February-2017
------------------------------
1. ChangeLog has the details of a lot of bug fixes and tidies.
2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax
checking is now done in the pre-pass that identifies capturing groups. This has
reduced the amount of duplication and made the code tidier. While doing this,
some minor bugs and Perl incompatibilities were fixed (see ChangeLog for
details.)
3. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the
reference is by name, there is only one group of that name. The referenced
group must, of course be of fixed length.
4. \g{+<number>} (e.g. \g{+2} ) is now supported. It is a "forward back
reference" and can be useful in repetitions (compare \g{-<number>} ). Perl does
not recognize this syntax.
5. pcre2grep now automatically expands its buffer up to a maximum set by
--max-buffer-size.
6. The -t option (grand total) has been added to pcre2grep.
7. A new function called pcre2_code_copy_with_tables() exists to copy a
compiled pattern along with a private copy of the character tables that is
uses.
8. A user supplied a number of patches to upgrade pcre2grep under Windows and
tidy the code.
9. Several updates have been made to pcre2test and test scripts (see
ChangeLog).
Version 10.22 29-July-2016
--------------------------
1. ChangeLog has the details of a number of bug fixes.
2. The POSIX wrapper function regcomp() did not used to support back references
and subroutine calls if called with the REG_NOSUB option. It now does.
3. A new function, pcre2_code_copy(), is added, to make a copy of a compiled
pattern.
4. Support for string callouts is added to pcre2grep.
5. Added the PCRE2_NO_JIT option to pcre2_match().
6. The pcre2_get_error_message() function now returns with a negative error
code if the error number it is given is unknown.
7. Several updates have been made to pcre2test and test scripts (see
ChangeLog).
Version 10.21 12-January-2016
-----------------------------
1. Many bugs have been fixed. A large number of them were provoked only by very
strange pattern input, and were discovered by fuzzers. Some others were
discovered by code auditing. See ChangeLog for details.
2. The Unicode tables have been updated to Unicode version 8.0.0.
3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a
class, where both values are literal letters in the same case, omit the
non-letter EBCDIC code points within the range.
4. There have been a number of enhancements to the pcre2_substitute() function,
giving more flexibility to replacement facilities. It is now also possible to
cause the function to return the needed buffer size if the one given is too
small.
5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such
as (*THEN:name) to be processed for backslashes and to take note of
PCRE2_EXTENDED.
6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a
pattern uses \C, and --never-backslash-C makes it possible to compile a version
PCRE2 in which the use of \C is always forbidden.
7. A limit to the length of pattern that can be handled can now be set by
calling pcre2_set_max_pattern_length().
8. When matching an unanchored pattern, a match can be required to begin within
a given number of code units after the start of the subject by calling
pcre2_set_offset_limit().
9. The pcre2test program has been extended to test new facilities, and it can
now run the tests when LF on its own is not a valid newline sequence.
10. The RunTest script has also been updated to enable more tests to be run.
11. There have been some minor performance enhancements.
Version 10.20 30-June-2015
--------------------------
1. Callouts with string arguments and the pcre2_callout_enumerate() function
have been implemented.
2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added.
3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a
subject in multiline mode.
4. The way named subpatterns are handled has been refactored. The previous
approach had several bugs.
5. The handling of \c in EBCDIC environments has been changed to conform to the
perlebcdic document. This is an incompatible change.
6. Bugs have been mended, many of them discovered by fuzzers.
Version 10.10 06-March-2015
---------------------------
1. Serialization and de-serialization functions have been added to the API,
making it possible to save and restore sets of compiled patterns, though
restoration must be done in the same environment that was used for compilation.
2. The (*NO_JIT) feature has been added; this makes it possible for a pattern
creator to specify that JIT is not to be used.
3. A number of bugs have been fixed. In particular, bugs that caused building
on Windows using CMake to fail have been mended.
Version 10.00 05-January-2015
-----------------------------
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
API, up to item 20 for release 8.36. New programs are recommended to use the
new library. Programs that use the original (PCRE1) API will need changing
before linking with the new library.
****

406
pcre2/NON-AUTOTOOLS-BUILD vendored Normal file

@ -0,0 +1,406 @@
Building PCRE2 without using autotools
--------------------------------------
This document contains the following sections:
General
Generic instructions for the PCRE2 C library
Stack size in Windows environments
Linking programs in Windows environments
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE2 on Windows with CMake
Building PCRE2 on Windows with Visual Studio
Testing with RunTest.bat
Building PCRE2 on native z/OS and z/VM
GENERAL
The basic PCRE2 library consists entirely of code written in Standard C, and so
should compile successfully on any system that has a Standard C compiler and
library.
The PCRE2 distribution includes a "configure" file for use by the
configure/make (autotools) build system, as found in many Unix-like
environments. The README file contains information about the options for
"configure".
There is also support for CMake, which some users prefer, especially in Windows
environments, though it can also be run in Unix-like environments. See the
section entitled "Building PCRE2 on Windows with CMake" below.
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
under the names src/config.h.generic and src/pcre2.h.generic. These are
provided for those who build PCRE2 without using "configure" or CMake. If you
use "configure" or CMake, the .generic versions are not used.
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
The following are generic instructions for building the PCRE2 C library "by
hand". If you are going to use CMake, this section does not apply to you; you
can skip ahead to the CMake section.
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
macro settings that it contains to whatever is appropriate for your
environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line
terminators by default.
When you subsequently compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
An alternative approach is not to edit src/config.h, but to use -D on the
compiler command line to make any changes that you need to the
configuration options. In this case -DHAVE_CONFIG_H must not be set.
NOTE: There have been occasions when the way in which certain parameters
in src/config.h are used has changed between releases. (In the
configure/make world, this is handled automatically.) When upgrading to a
new release, you are strongly advised to review src/config.h.generic
before re-using what you had previously.
Note also that the src/config.h.generic file is created from a config.h
that was generated by Autotools, which automatically includes settings of
a number of macros that are not actually used by PCRE2 (for example,
HAVE_MEMORY_H).
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
(3) EITHER:
Copy or rename file src/pcre2_chartables.c.dist as
src/pcre2_chartables.c.
OR:
Compile src/pcre2_dftables.c as a stand-alone program (using
-DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
the single argument "src/pcre2_chartables.c". This generates a set of
standard character tables and writes them to that file. The tables are
generated using the default C locale for your system. If you want to use
a locale that is specified by LC_xxx environment variables, add the -L
option to the pcre2_dftables command. You must use this method if you
are building on a system that uses EBCDIC code.
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
specify alternative tables at run time.
(4) For an 8-bit library, compile the following source files from the src
directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
set -DHAVE_CONFIG_H if you have set up src/config.h with your
configuration, or else use other -D settings to change the configuration
as required.
pcre2_auto_possess.c
pcre2_chartables.c
pcre2_compile.c
pcre2_config.c
pcre2_context.c
pcre2_convert.c
pcre2_dfa_match.c
pcre2_error.c
pcre2_extuni.c
pcre2_find_bracket.c
pcre2_jit_compile.c
pcre2_maketables.c
pcre2_match.c
pcre2_match_data.c
pcre2_newline.c
pcre2_ord2utf.c
pcre2_pattern_info.c
pcre2_script_run.c
pcre2_serialize.c
pcre2_string_utils.c
pcre2_study.c
pcre2_substitute.c
pcre2_substring.c
pcre2_tables.c
pcre2_ucd.c
pcre2_valid_utf.c
pcre2_xclass.c
Make sure that you include -I. in the compiler command (or equivalent for
an unusual compiler) so that all included PCRE2 header files are first
sought in the src directory under the current directory. Otherwise you run
the risk of picking up a previously-installed file from somewhere else.
Note that you must compile pcre2_jit_compile.c, even if you have not
defined SUPPORT_JIT in src/config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre2_jit_compile.c #includes other files from the sljit subdirectory,
all of whose names begin with "sljit". It also #includes
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
these yourself.
Note also that the pcre2_fuzzsupport.c file contains special code that is
useful to those who want to run fuzzing tests on the PCRE2 library. Unless
you are doing that, you can ignore it.
(5) Now link all the compiled code into an object library in whichever form
your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
If your system has static and shared libraries, you may have to do this
once for each type.
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
instead of the 8-bit library) just supply 16 or 32 as the value of
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
(7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the src/pcre2posix.h file and then
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
library.
(8) The pcre2test program can be linked with any combination of the 8-bit,
16-bit and 32-bit libraries (depending on what you selected in
src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
needs the pcre2posix wrapper library.
(9) Run pcre2test on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE2"
in the README file. If you compiled more than one of the 8-bit, 16-bit and
32-bit libraries, you need to run pcre2test with the -16 option to do
16-bit tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
For example, test 4 is for Unicode support, and will not run if you have
built PCRE2 without it. See the comments at the start of each testinput
file. If you have a suitable Unix-like shell, the RunTest script will run
the appropriate tests for you. The command "RunTest list" will output a
list of all the tests.
Note that the supplied files are in Unix format, with just LF characters
as line terminators. You may need to edit them to change this if your
system uses a different convention.
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
by running pcre2test with the -jit option. This is done automatically by
the RunTest script. You might also like to build and run the freestanding
JIT test program, src/pcre2_jit_test.c.
(11) If you want to use the pcre2grep command, compile and link
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
need the pcre2posix library). If you have built the PCRE2 library with JIT
support by defining SUPPORT_JIT in src/config.h, you can also define
SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless
it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without
defining SUPPORT_JIT, pcre2grep does not try to make use of JIT.
STACK SIZE IN WINDOWS ENVIRONMENTS
Prior to release 10.30 the default system stack size of 1MiB in some Windows
environments caused issues with some tests. This should no longer be the case
for 10.30 and later releases.
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
If you want to statically link a program against a PCRE2 library in the form of
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
It is possible to compile programs to use different calling conventions using
MSVC. Search the web for "calling conventions" for more information. To make it
easier to change the calling convention for the exported functions in the
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
not set, it defaults to empty; the default calling convention is then used
(which is what is wanted most of the time).
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
There are two ways of building PCRE2 using the "configure, make, make install"
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
the same thing; they are completely different from each other. There is also
support for building using CMake, which some users find a more straightforward
way of building PCRE2 under Windows.
The MinGW home page (http://www.mingw.org/) says this:
MinGW: A collection of freely available and freely distributable Windows
specific header files and import libraries combined with GNU toolsets that
allow one to produce native Windows programs that do not rely on any
3rd-party C runtime DLLs.
The Cygwin home page (http://www.cygwin.com/) says this:
Cygwin is a Linux-like environment for Windows. It consists of two parts:
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
substantial Linux API functionality
. A collection of tools which provide Linux look and feel.
On both MinGW and Cygwin, PCRE2 should build correctly using:
./configure && make && make install
This should create two libraries called libpcre2-8 and libpcre2-posix. These
are independent libraries: when you link with libpcre2-posix you must also link
with libpcre2-8, which contains the basic functions.
Using Cygwin's compiler generates libraries and executables that depend on
cygwin1.dll. If a library that is generated this way is distributed,
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
licence, this forces not only PCRE2 to be under the GPL, but also the entire
application. A distributor who wants to keep their own code proprietary must
purchase an appropriate Cygwin licence.
MinGW has no such restrictions. The MinGW compiler generates a library or
executable that can run standalone on Windows without any third party dll or
licensing issues.
But there is more complication:
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
gcc and MinGW's gcc). So, a user can:
. Build native binaries by using MinGW or by getting Cygwin and using
-mno-cygwin.
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
compiler flags.
The test files that are supplied with PCRE2 are in UNIX format, with LF
characters as line terminators. Unless your PCRE2 library uses a default
newline option that includes LF as a valid newline, it may be necessary to
change the line terminators in the test files to get some of the tests to work.
BUILDING PCRE2 ON WINDOWS WITH CMAKE
CMake is an alternative configuration facility that can be used instead of
"configure". CMake creates project files (make files, solution files, etc.)
tailored to numerous development environments, including Visual Studio,
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the
cache can be deleted by selecting "File > Delete Cache".
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
to start Cmake from the Windows Start menu, as this can lead to errors.
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
6. Hit the "Configure" button.
7. Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
8. The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
9. Hit "Configure" again. The adjacent "Generate" button should now be
active.
10. Hit "Generate".
11. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
12. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
The code currently cannot be compiled without a stdint.h header, which is
available only in relatively recent versions of Visual Studio. However, this
portable and permissively-licensed implementation of the header worked without
issue:
http://www.azillionmonkeys.com/qed/pstdint.h
Just rename it and drop it into the top level of the build tree.
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
on your configuration options, possibly other test programs) in the build
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
exe paths.
For manual testing with RunTest.bat, provided the build dir is a subdirectory
of the source directory: Open command shell window. Chdir to the location
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
To run only a particular test with RunTest.Bat provide a test number argument.
Otherwise:
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
have been created.
2. Edit RunTest.bat to indentify the full or relative location of
the pcre2 source (wherein which the testdata folder resides), e.g.:
set srcdir=C:\pcre2\pcre2-10.00
3. In a Windows command environment, chdir to the location of your bat and
exe programs.
4. Run RunTest.bat. Test outputs will automatically be compared to expected
results, and discrepancies will be identified in the console output.
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
applications can be supported through UNIX System Services, and in such an
environment it should be possible to build PCRE2 in the same way as in other
systems, with the EBCDIC related configuration settings, but it is not known if
anybody has tried this.
In native z/OS (without UNIX System Services) and in z/VM, special ports are
required. For details, please see file 939 on this web site:
http://www.cbttape.org
Everything in that location, source and executable, is in EBCDIC and native
z/OS file formats. The port provides an API for LE languages such as COBOL and
for the z/OS and z/VM versions of the Rexx languages.
==============================
Last Updated: 14 November 2018
==============================

236
pcre2/PrepareRelease vendored Executable file

@ -0,0 +1,236 @@
#/bin/sh
# Script to prepare the files for building a PCRE2 release. It does some
# processing of the documentation, detrails files, and creates pcre2.h.generic
# and config.h.generic (for use by builders who can't run ./configure).
# You must run this script before runnning "make dist". If its first argument
# is "doc", it stops after preparing the documentation. There are no other
# arguments. The script makes use of the following files:
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
# "knows" the relevant troff constructs that are used in the PCRE2
# man pages.
# CheckMan A Perl script that checks man pages for typos in the mark up.
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
# removing backspaces and other redundant text so as to produce
# a readable .txt file.
# Detrail A Perl script that removes trailing spaces from files.
# doc/index.html.src
# A file that is copied as index.html into the doc/html directory
# when the HTML documentation is built. It works like this so that
# doc/html can be deleted and re-created from scratch.
# README & NON-AUTOTOOLS-BUILD
# These files are copied into the doc/html directory, with .txt
# extensions so that they can by hyperlinked from the HTML
# documentation, because some people just go to the HTML without
# looking for text files.
# First, sort out the documentation. Remove pcre2demo.3 first because it won't
# pass the markup check (it is created below, using markup that none of the
# other pages use).
cd doc
echo Processing documentation
/bin/rm -f pcre2demo.3
# Check the remaining man pages
perl ../CheckMan *.1 *.3
if [ $? != 0 ] ; then exit 1; fi
# Make Text form of the documentation. It needs some mangling to make it
# tidy for online reading. Concatenate all the .3 stuff, but omit the
# individual function pages.
cat <<End >pcre2.txt
-----------------------------------------------------------------------------
This file contains a concatenation of the PCRE2 man pages, converted to plain
text format for ease of searching with a text editor, or for use on systems
that do not have a man page processor. The small individual files that give
synopses of each function in the library have not been included. Neither has
the pcre2demo program. There are separate text files for the pcre2grep and
pcre2test commands.
-----------------------------------------------------------------------------
End
echo "Making pcre2.txt"
for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \
pcre2posix pcre2sample pcre2serialize pcre2syntax \
pcre2unicode ; do
echo " Processing $file.3"
nroff -c -man $file.3 >$file.rawtxt
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
/bin/rm $file.rawtxt
echo "------------------------------------------------------------------------------" >>pcre2.txt
if [ "$file" != "pcre2sample" ] ; then
echo " " >>pcre2.txt
echo " " >>pcre2.txt
fi
done
# The three commands
for file in pcre2test pcre2grep pcre2-config ; do
echo Making $file.txt
nroff -c -man $file.1 >$file.rawtxt
perl ../CleanTxt <$file.rawtxt >$file.txt
/bin/rm $file.rawtxt
done
# Make pcre2demo.3 from the pcre2demo.c source file
echo "Making pcre2demo.3"
perl <<"END" >pcre2demo.3
open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
print OUT ".\\\" Start example.\n" .
".de EX\n" .
". nr mE \\\\n(.f\n" .
". nf\n" .
". nh\n" .
". ft CW\n" .
"..\n" .
".\n" .
".\n" .
".\\\" End example.\n" .
".de EE\n" .
". ft \\\\n(mE\n" .
". fi\n" .
". hy \\\\n(HY\n" .
"..\n" .
".\n" .
".EX\n" ;
while (<IN>)
{
s/\\/\\e/g;
print OUT;
}
print OUT ".EE\n";
close(IN);
close(OUT);
END
if [ $? != 0 ] ; then exit 1; fi
# Make HTML form of the documentation.
echo "Making HTML documentation"
/bin/rm html/*
cp index.html.src html/index.html
cp ../README html/README.txt
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
for file in *.1 ; do
base=`basename $file .1`
echo " Making $base.html"
perl ../132html -toc $base <$file >html/$base.html
done
# Exclude table of contents for function summaries. It seems that expr
# forces an anchored regex. Also exclude them for small pages that have
# only one section.
for file in *.3 ; do
base=`basename $file .3`
toc=-toc
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
if [ "$base" = "pcre2sample" ] || \
[ "$base" = "pcre2compat" ] || \
[ "$base" = "pcre2limits" ] || \
[ "$base" = "pcre2unicode" ] ; then
toc=""
fi
echo " Making $base.html"
perl ../132html $toc $base <$file >html/$base.html
if [ $? != 0 ] ; then exit 1; fi
done
# End of documentation processing; stop if only documentation required.
cd ..
echo Documentation done
if [ "$1" = "doc" ] ; then exit; fi
# These files are detrailed; do not detrail the test data because there may be
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
# line endings and the detrail script removes all trailing white space. The
# configure files are also omitted from the detrailing.
files="\
Makefile.am \
configure.ac \
README \
LICENCE \
COPYING \
AUTHORS \
NEWS \
NON-AUTOTOOLS-BUILD \
INSTALL \
132html \
CleanTxt \
Detrail \
ChangeLog \
CMakeLists.txt \
RunGrepTest \
RunTest \
pcre2-config.in \
perltest.sh \
libpcre2-8.pc.in \
libpcre2-16.pc.in \
libpcre2-32.pc.in \
libpcre2-posix.pc.in \
src/pcre2_dftables.c \
src/pcre2.h.in \
src/pcre2_auto_possess.c \
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
src/pcre2_convert.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
src/pcre2_extuni.c \
src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_compile.c \
src/pcre2_jit_match.c \
src/pcre2_jit_misc.c \
src/pcre2_jit_test.c \
src/pcre2_maketables.c \
src/pcre2_match.c \
src/pcre2_match_data.c \
src/pcre2_newline.c \
src/pcre2_ord2utf.c \
src/pcre2_pattern_info.c \
src/pcre2_printint.c \
src/pcre2_string_utils.c \
src/pcre2_study.c \
src/pcre2_substring.c \
src/pcre2_tables.c \
src/pcre2_ucd.c \
src/pcre2_ucp.h \
src/pcre2_valid_utf.c \
src/pcre2_xclass.c \
src/pcre2demo.c \
src/pcre2grep.c \
src/pcre2posix.c \
src/pcre2posix.h \
src/pcre2test.c"
echo Detrailing
perl ./Detrail $files doc/p* doc/html/*
echo Done
#End

906
pcre2/README vendored Normal file

@ -0,0 +1,906 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
API. Since its initial release in 2015, there has been further development of
the code and it now differs from PCRE1 in more than just the API. There are new
features and the internals have been improved. The latest release of PCRE2 is
available in three alternative formats from:
https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.gz
https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.bz2
https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.zip
There is a mailing list for discussion about the development of PCRE (both the
original and new APIs) at pcre-dev@exim.org. You can access the archives and
subscribe or manage your subscription here:
https://lists.exim.org/mailman/listinfo/pcre-dev
Please read the NEWS file if you are upgrading from a previous release. The
contents of this README file are:
The PCRE2 APIs
Documentation for PCRE2
Contributions by users of PCRE2
Building PCRE2 on non-Unix-like systems
Building PCRE2 without using autotools
Building PCRE2 using autotools
Retrieving configuration information
Shared libraries
Cross-compiling using autotools
Making new tarballs
Testing PCRE2
Character tables
File manifest
The PCRE2 APIs
--------------
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there
are no C++ wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
library that are based on the POSIX regular expression API (see the pcre2posix
man page). These are built into a library called libpcre2-posix. Note that this
just provides a POSIX calling interface to PCRE2; the regular expressions
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
and does not give full access to all of PCRE2's facilities.
The header file for the POSIX-style functions is called pcre2posix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE2 with
an existing program that uses the POSIX API, pcre2posix.h will have to be
renamed or pointed at by a link (or the program modified, of course). See the
pcre2posix documentation for more details.
Documentation for PCRE2
-----------------------
If you install PCRE2 in the normal way on a Unix-like system, you will end up
with a set of man pages whose names all start with "pcre2". The one that is
just called "pcre2" lists all the others. In addition to these man pages, the
PCRE2 documentation is supplied in two other forms:
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
doc/pcre2test.txt in the source distribution. The first of these is a
concatenation of the text forms of all the section 3 man pages except the
listing of pcre2demo.c and those that summarize individual functions. The
other two are the text forms of the section 1 man pages for the pcre2grep
and pcre2test commands. These text forms are provided for ease of scanning
with text editors or similar tools. They are installed in
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
(defaulting to /usr/local).
2. A set of files containing all the documentation in HTML form, hyperlinked
in various ways, and rooted in a file called index.html, is distributed in
doc/html and installed in <prefix>/share/doc/pcre2/html.
Building PCRE2 on non-Unix-like systems
---------------------------------------
For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if
your system supports the use of "configure" and "make" you may be able to build
PCRE2 using autotools in the same way as for many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
PCRE2 has been compiled on many different operating systems. It should be
straightforward to build PCRE2 on any system that has a Standard C compiler and
library, because it uses only Standard C functions.
Building PCRE2 without using autotools
--------------------------------------
The use of autotools (in particular, libtool) is problematic in some
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
file for ways of building PCRE2 without using autotools.
Building PCRE2 using autotools
------------------------------
The following instructions assume the use of the widely used "configure; make;
make install" (autotools) process.
To build PCRE2 on system that supports autotools, first run the "configure"
command from the PCRE2 distribution directory, with your current directory set
to the directory where you want the files to be created. This command is a
standard GNU "autoconf" configuration script, for which generic instructions
are supplied in the file INSTALL.
Most commonly, people build PCRE2 within its own distribution directory, and in
this case, on many systems, just running "./configure" is sufficient. However,
the usual methods of changing standard defaults are available. For example:
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
This command specifies that the C compiler should be run with the flags '-O2
-Wall' instead of the default, and that "make install" should install PCRE2
under /opt/local instead of the default /usr/local.
If you want to build in a different directory, just run "configure" with that
directory as current. For example, suppose you have unpacked the PCRE2 source
into /source/pcre2/pcre2-xxx, but you want to build it in
/build/pcre2/pcre2-xxx:
cd /build/pcre2/pcre2-xxx
/source/pcre2/pcre2-xxx/configure
PCRE2 is written in C and is normally compiled as a C library. However, it is
possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this.
There are some optional features that can be included or omitted from the PCRE2
library. They are also documented in the pcre2build man page.
. By default, both shared and static libraries are built. You can change this
by adding one of these options to the "configure" command:
--disable-shared
--disable-static
(See also "Shared libraries on Unix-like systems" below.)
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
the "configure" command, the 16-bit library is also built. If you add
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
to disable building the 8-bit library.
. If you want to include support for just-in-time (JIT) compiling, which can
give large performance improvements on certain platforms, add --enable-jit to
the "configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error. If in doubt, use --enable-jit=auto, which
enables JIT only if the current hardware is supported.
. If you are enabling JIT under SELinux environment you may also want to add
--enable-jit-sealloc, which enables the use of an executable memory allocator
that is compatible with SELinux. Warning: this allocator is experimental!
It does not support fork() operation and may crash when no disk space is
available. This option has no effect if JIT is disabled.
. If you do not want to make use of the default support for UTF-8 Unicode
character strings in the 8-bit library, UTF-16 Unicode character strings in
the 16-bit library, or UTF-32 Unicode character strings in the 32-bit
library, you can add --disable-unicode to the "configure" command. This
reduces the size of the libraries. It is not possible to configure one
library with Unicode support, and another without, in the same configuration.
It is also not possible to use --enable-ebcdic (see below) with Unicode
support, so if this option is set, you must also use --disable-unicode.
When Unicode support is available, the use of a UTF encoding still has to be
enabled by setting the PCRE2_UTF option at run time or starting a pattern
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms.
As well as supporting UTF strings, Unicode support includes support for the
\P, \p, and \X sequences that recognize Unicode character properties.
However, only the basic two-letter properties such as Lu are supported.
Escape sequences such as \d and \w in patterns do not by default make use of
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
or starting a pattern with (*UCP).
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
character as indicating the end of a line. Whatever you specify at build time
is the default; the caller of PCRE2 can change the selection at run time. The
default newline indicator is a single LF character (the Unix standard). You
can specify the default newline indicator by adding --enable-newline-is-cr,
--enable-newline-is-lf, --enable-newline-is-crlf,
--enable-newline-is-anycrlf, --enable-newline-is-any, or
--enable-newline-is-nul to the "configure" command, respectively.
. By default, the sequence \R in a pattern matches any Unicode line ending
sequence. This is independent of the option specifying what PCRE2 considers
to be the end of a line (see above). However, the caller of PCRE2 can
restrict \R to match only CR, LF, or CRLF. You can make this the default by
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
. In a pattern, the escape sequence \C matches a single code unit, even in a
UTF mode. This can be dangerous because it breaks up multi-code-unit
characters. You can build PCRE2 with the use of \C permanently locked out by
adding --enable-never-backslash-C (note the upper case C) to the "configure"
command. When \C is allowed by the library, individual applications can lock
it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option.
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
pattern. This limits the amount of system stack that a pattern uses when it
is compiled. The default is 250, but you can change it by setting, for
example,
--with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of computing resource
it uses when matching a pattern. If the limit is exceeded during a match, the
match fails. The default is ten million. You can change the default by
setting, for example,
--with-match-limit=500000
on the "configure" command. This is just the default; individual calls to
pcre2_match() or pcre2_dfa_match() can supply their own value. There is more
discussion in the pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking
(pcre2_match()) or nested function calls (pcre2_dfa_match()) during a
matching process, which indirectly limits the amount of heap memory that is
used, and in the case of pcre2_dfa_match() the amount of stack as well. This
counter also has a default of ten million, which is essentially "unlimited".
You can change the default by setting, for example,
--with-match-limit-depth=5000
There is more discussion in the pcre2api man page (search for
pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
the pcre2_match() and pcre2_dfa_match() interpreters:
--with-heap-limit=500
The units are kibibytes (units of 1024 bytes). This limit does not apply when
the JIT optimization (which has its own memory control features) is used.
There is more discussion on the pcre2api man page (search for
pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around
64 kibibytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets
to different parts of the compiled pattern. In the 16-bit library,
--with-link-size=3 is the same as --with-link-size=4, which (in both
libraries) uses four-byte offsets. Increasing the internal link size reduces
performance in the 8-bit and 16-bit libraries. In the 32-bit library, the
link size setting is ignored, as 4-byte offsets are always used.
. For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, it uses a set of
tables for ASCII encoding that is part of the distribution. If you specify
--enable-rebuild-chartables
a program called pcre2_dftables is compiled and run in the default C locale
when you obey "make". It builds a source file called pcre2_chartables.c. If
you do not specify this option, pcre2_chartables.c is created as a copy of
pcre2_chartables.c.dist. See "Character tables" below for further
information.
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
character code (as opposed to ASCII/Unicode) by specifying
--enable-ebcdic --disable-unicode
This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
. If you specify --enable-debug, additional debugging code is included in the
build. This option is intended for use by the PCRE2 maintainers.
. In environments where valgrind is installed, if you specify
--enable-valgrind
PCRE2 will use valgrind annotations to mark certain memory regions as
unaddressable. This allows it to detect invalid memory accesses, and is
mostly useful for debugging PCRE2 itself.
. In environments where the gcc compiler is used and lcov is installed, if you
specify
--enable-coverage
the build process implements a code coverage report for the test suite. The
report is generated by running "make coverage". If ccache is installed on
your system, it must be disabled when building PCRE2 for coverage reporting.
You can do this by setting the environment variable CCACHE_DISABLE=1 before
running "make" to build PCRE2. There is more information about coverage
reporting in the "pcre2build" documentation.
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
you add --disable-pcre2grep-jit to the "configure" command.
. There is support for calling external programs during matching in the
pcre2grep command, using PCRE2's callout facility with string arguments. This
support can be disabled by adding --disable-pcre2grep-callout to the
"configure" command. There are two kinds of callout: one that generates
output from inbuilt code, and another that calls an external program. The
latter has special support for Windows and VMS; otherwise it assumes the
existence of the fork() function. This facility can be disabled by adding
--disable-pcre2grep-callout-fork to the "configure" command.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
specifying one or both of
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
Of course, the relevant libraries must be installed on your system.
. The default starting size (in bytes) of the internal buffer used by pcre2grep
can be set by, for example:
--with-pcre2grep-bufsize=51200
The value must be a plain integer. The default is 20480. The amount of memory
used by pcre2grep is actually three times this number, to allow for "before"
and "after" lines. If very long lines are encountered, the buffer is
automatically enlarged, up to a fixed maximum size.
. The default maximum size of pcre2grep's internal buffer can be set by, for
example:
--with-pcre2grep-max-bufsize=2097152
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
whichever is the larger.
. It is possible to compile pcre2test so that it links with the libreadline
or libedit libraries, by specifying, respectively,
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
If this is done, when pcre2test's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licenced, so if you distribute a binary of
pcre2test linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
Enabling libreadline causes the -lreadline option to be added to the
pcre2test build. In many operating environments with a sytem-installed
readline library this is sufficient. However, in some environments (e.g. if
an unmodified distribution version of readline is in use), it may be
necessary to specify something like LIBS="-lncurses" as well. This is
because, to quote the readline INSTALL, "Readline uses the termcap functions,
but does not link with the termcap or curses library itself, allowing
applications which link with readline the to choose an appropriate library."
If you get error messages about missing functions tgetstr, tgetent, tputs,
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
should fix it.
. The C99 standard defines formatting modifiers z and t for size_t and
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
defined and has a value greater than or equal to 199901L (indicating C99).
However, there is at least one environment that claims to be C99 but does not
support these modifiers. If --disable-percent-zt is specified, no use is made
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
size_t values.
. There is a special option called --enable-fuzz-support for use by people who
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
be built, but not installed. This contains a single function called
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
length of the string. When called, this function tries to compile the string
as a pattern, and if that succeeds, to match it. This is done both with no
options and with some random options bits that are generated from the string.
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
be created. This is normally run under valgrind or used when PCRE2 is
compiled with address sanitizing enabled. It calls the fuzzing function and
outputs information about it is doing. The input strings are specified by
arguments: if an argument starts with "=" the rest of it is a literal input
string. Otherwise, it is assumed to be a file name, and the contents of the
file are the test string.
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
which caused pcre2_match() to use individual blocks on the heap for
backtracking instead of recursive function calls (which use the stack). This
is now obsolete since pcre2_match() was refactored always to use the heap (in
a much more efficient way than before). This option is retained for backwards
compatibility, but has no effect other than to output a warning.
The "configure" script builds the following files for the basic C library:
. Makefile the makefile that builds the library
. src/config.h build-time configuration options for the library
. src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure"
. libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc )
. libpcre2-posix.pc )
. libtool script that builds shared and/or static libraries
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
tarballs under the names config.h.generic and pcre2.h.generic. These are
provided for those who have to build PCRE2 without using "configure" or CMake.
If you use "configure" or CMake, the .generic versions are not used.
The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs.
Once "configure" has run, you can run "make". This builds whichever of the
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
program called pcre2test. If you enabled JIT support with --enable-jit, another
test program called pcre2_jit_test is built as well. If the 8-bit library is
built, libpcre2-posix and the pcre2grep command are also built. Running
"make" with the -j option may speed up compilation on multiprocessor systems.
The command "make check" runs all the appropriate tests. Details of the PCRE2
tests are given below in a separate section of this document. The -j option of
"make" can also be used when running the tests.
You can use "make install" to install PCRE2 into live directories on your
system. The following are installed (file names are all relative to the
<prefix> that is set when "configure" is run):
Commands (bin):
pcre2test
pcre2grep (if 8-bit support is enabled)
pcre2-config
Libraries (lib):
libpcre2-8 (if 8-bit support is enabled)
libpcre2-16 (if 16-bit support is enabled)
libpcre2-32 (if 32-bit support is enabled)
libpcre2-posix (if 8-bit support is enabled)
Configuration information (lib/pkgconfig):
libpcre2-8.pc
libpcre2-16.pc
libpcre2-32.pc
libpcre2-posix.pc
Header files (include):
pcre2.h
pcre2posix.h
Man pages (share/man/man{1,3}):
pcre2grep.1
pcre2test.1
pcre2-config.1
pcre2.3
pcre2*.3 (lots more pages, all starting "pcre2")
HTML documentation (share/doc/pcre2/html):
index.html
*.html (lots more pages, hyperlinked from index.html)
Text file documentation (share/doc/pcre2):
AUTHORS
COPYING
ChangeLog
LICENCE
NEWS
README
pcre2.txt (a concatenation of the man(3) pages)
pcre2test.txt the pcre2test man page
pcre2grep.txt the pcre2grep man page
pcre2-config.txt the pcre2-config man page
If you want to remove PCRE2 from your system, you can run "make uninstall".
This removes all the files that "make install" installed. However, it does not
remove any directories, because these are often shared with other programs.
Retrieving configuration information
------------------------------------
Running "make install" installs the command pcre2-config, which can be used to
recall information about the PCRE2 configuration and installation. For example:
pcre2-config --version
prints the version number, and
pcre2-config --libs8
outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information
about installed libraries. Instead of separate commands for each library, a
single command is used. For example:
pkg-config --libs libpcre2-16
The data is held in *.pc files that are installed in a directory called
<prefix>/lib/pkgconfig.
Shared libraries
----------------
The default distribution builds PCRE2 as shared libraries and static libraries,
as long as the operating system supports shared libraries. Shared library
support relies on the "libtool" script which is built as part of the
"configure" process.
The libtool script is used to compile and link both shared and static
libraries. They are placed in a subdirectory called .libs when they are newly
built. The programs pcre2test and pcre2grep are built to use these uninstalled
libraries (by means of wrapper scripts in the case of shared libraries). When
you use "make install" to install shared libraries, pcre2grep and pcre2test are
automatically re-built to use the newly installed shared libraries before being
installed themselves. However, the versions left in the build directory still
use the uninstalled libraries.
To build PCRE2 using static libraries only you must use --disable-shared when
configuring it. For example:
./configure --prefix=/usr/gnu --disable-shared
Then run "make" in the usual way. Similarly, you can use --disable-static to
build only shared libraries.
Cross-compiling using autotools
-------------------------------
You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE2 for some other host. However, you should NOT
specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
source file is compiled and run on the local host, in order to generate the
inbuilt character tables (the pcre2_chartables.c file). This will probably not
work, because pcre2_dftables.c needs to be compiled with the local compiler,
not the cross compiler.
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
created by making a copy of pcre2_chartables.c.dist, which is a default set of
tables that assumes ASCII code. Cross-compiling with the default tables should
not be a problem.
If you need to modify the character tables when cross-compiling, you should
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
hand and run it on the local host to make a new version of
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
at build time" for more details.
Making new tarballs
-------------------
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.
If you have modified any of the man page sources in the doc directory, you
should first run the PrepareRelease script before making a distribution. This
script creates the .txt and HTML forms of the documentation from the man pages.
Testing PCRE2
-------------
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
There is another script called RunGrepTest that tests the pcre2grep command.
When JIT support is enabled, a third test program called pcre2_jit_test is
built. Both the scripts and all the program tests are run if you obey "make
check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
The RunTest script runs the pcre2test test program (which is documented in its
own man page) on each of the relevant testinput files in the testdata
directory, and compares the output with the contents of the corresponding
testoutput files. RunTest uses a file called testtry to hold the main output
from pcre2test. Other files whose names begin with "test" are used as working
files in some tests.
Some tests are relevant only when certain build-time options were selected. For
example, the tests for UTF-8/16/32 features are run only when Unicode support
is available. RunTest outputs a comment when it skips a test.
Many (but not all) of the tests that are not skipped are run twice if JIT
support is available. On the second run, JIT compilation is forced. This
testing can be suppressed by putting "nojit" on the RunTest command line.
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
libraries that are enabled. If you want to run just one set of tests, call
RunTest with either the -8, -16 or -32 option.
If valgrind is installed, you can run the tests under it by putting "valgrind"
on the RunTest command line. To run pcre2test on just one or more specific test
files, give their numbers as arguments to RunTest, for example:
RunTest 2 7 11
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
end), or a number preceded by ~ to exclude a test. For example:
Runtest 3-15 ~10
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
except test 13. Whatever order the arguments are in, the tests are always run
in numerical order.
You can also call RunTest with the single argument "list" to cause it to output
a list of tests.
The test sequence starts with "test 0", which is a special test that has no
input file, and whose output is not checked. This is because it will be
different on different hardware and with different configurations. The test
exists in order to exercise some of pcre2test's code that would not otherwise
be run.
Tests 1 and 2 can always be run, as they expect only plain text strings (not
UTF) and make no use of Unicode properties. The first test file can be fed
directly into the perltest.sh script to check that Perl gives the same results.
The only difference you should see is in the first few lines, where the Perl
version is given instead of the PCRE2 version. The second set of tests check
auxiliary functions, error detection, and run-time flags that are specific to
PCRE2. It also uses the debugging flags to check some of the internals of
pcre2_compile().
If you build PCRE2 with a locale setting that is not the standard C locale, the
character tables may be different (see next paragraph). In some cases, this may
cause failures in the second set of tests. For example, in a locale where the
isprint() function yields TRUE for characters in the range 128-255, the use of
[:isascii:] inside a character class defines a different set of characters, and
this shows up in this test as a difference in the compiled code, which is being
listed for checking. For example, where the comparison test output contains
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
cases. This is not a bug in PCRE2.
Test 3 checks pcre2_maketables(), the facility for building a set of character
tables for a specific locale and using them instead of the default tables. The
script uses the "locale" command to check for the availability of the "fr_FR",
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
the list of available locales, the third test cannot be run, and a comment is
output to say why. If running this test produces an error like this:
** Failed to set locale "fr_FR"
it means that the given locale is not available on your system, despite being
listed by "locale". This does not mean that PCRE2 is broken. There are three
alternative output files for the third test, because three different versions
of the French locale have been encountered. The test passes if its output
matches any one of them.
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
with the perltest.sh script, and test 5 checking PCRE2-specific things.
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
non-UTF mode and UTF-mode with Unicode property support, respectively.
Test 8 checks some internal offsets and code size features, but it is run only
when Unicode support is enabled. The output is different in 8-bit, 16-bit, and
32-bit modes and for different link sizes, so there are different output files
for each mode and link size.
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
16-bit and 32-bit modes. These are tests that generate different output in
8-bit mode. Each pair are for general cases and Unicode support, respectively.
Test 13 checks the handling of non-UTF characters greater than 255 by
pcre2_dfa_match() in 16-bit and 32-bit modes.
Test 14 contains some special UTF and UCP tests that give different output for
different code unit widths.
Test 15 contains a number of tests that must not be run with JIT. They check,
among other non-JIT things, the match-limiting features of the intepretive
matcher.
Test 16 is run only when JIT support is not available. It checks that an
attempt to use JIT has the expected behaviour.
Test 17 is run only when JIT support is available. It checks JIT complete and
partial modes, match-limiting under JIT, and other JIT-specific features.
Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to
the 8-bit library, without and with Unicode support, respectively.
Test 20 checks the serialization functions by writing a set of compiled
patterns to a file, and then reloading and checking them.
Tests 21 and 22 test \C support when the use of \C is not locked out, without
and with UTF support, respectively. Test 23 tests \C when it is locked out.
Tests 24 and 25 test the experimental pattern conversion functions, without and
with UTF support, respectively.
Character tables
----------------
For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, a set of tables that is
built into the library is used. The pcre2_maketables() function can be called
by an application to create a new set of tables in the current locale. This are
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
compile context.
The source file called pcre2_chartables.c contains the default set of tables.
By default, this is created as a copy of pcre2_chartables.c.dist, which
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
specified for ./configure, a new version of pcre2_chartables.c is built by the
program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
character handling functions such as isalnum(), isalpha(), isupper(),
islower(), etc. to build the table sources. This means that the default C
locale that is set for your system will control the contents of these default
tables. You can change the default tables by editing pcre2_chartables.c and
then re-building PCRE2. If you do this, you should take care to ensure that the
file does not get automatically re-generated. The best way to do this is to
move pcre2_chartables.c.dist out of the way and replace it with your customized
tables.
When the pcre2_dftables program is run as a result of specifying
--enable-rebuild-chartables, it uses the default C locale that is set on your
system. It does not pay attention to the LC_xxx environment variables. In other
words, it uses the system's default locale rather than whatever the compiling
user happens to have set. If you really do want to build a source set of
character tables in a locale that is specified by the LC_xxx variables, you can
run the pcre2_dftables program by hand with the -L option. For example:
./pcre2_dftables -L pcre2_chartables.c.special
The second argument names the file where the source code for the tables is
written. The first two 256-byte tables provide lower casing and case flipping
functions, respectively. The next table consists of a number of 32-byte bit
maps which identify certain character classes such as digits, "word"
characters, white space, etc. These are used when building 32-byte bit maps
that represent character classes for code points less than 256. The final
256-byte table has bits indicating various character types, as follows:
1 white space character
2 letter
4 lower case letter
8 decimal digit
16 alphanumeric or '_'
You can also specify -b (with or without -L) when running pcre2_dftables. This
causes the tables to be written in binary instead of as source code. A set of
binary tables can be loaded into memory by an application and passed to
pcre2_compile() in the same way as tables created dynamically by calling
pcre2_maketables(). The tables are just a string of bytes, independent of
hardware characteristics such as endianness. This means they can be bundled
with an application that runs in different environments, to ensure consistent
behaviour.
See also the pcre2build section "Creating character tables at build time".
File manifest
-------------
The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
when --enable-rebuild-chartables is specified
src/pcre2_chartables.c.dist a default set of character tables that assume
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_compile.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2_printint.c debugging function that is used by pcre2test,
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
src/config.h.in template for config.h, when built by "configure"
src/pcre2.h.in template for pcre2.h when built by "configure"
src/pcre2posix.h header for the external POSIX wrapper API
src/pcre2_internal.h header for internal use
src/pcre2_intmodedep.h a mode-specific internal header
src/pcre2_ucp.h header for Unicode property handling
sljit/* source files for the JIT compiler
(B) Source files for programs that use PCRE2:
src/pcre2demo.c simple demonstration of coding calls to PCRE2
src/pcre2grep.c source of a grep utility that uses PCRE2
src/pcre2test.c comprehensive test program
src/pcre2_jit_test.c JIT test program
(C) Auxiliary files:
132html script to turn "man" pages into HTML
AUTHORS information about the author of PCRE2
ChangeLog log of changes to the code
CleanTxt script to clean nroff output for txt man pages
Detrail script to remove trailing spaces
HACKING some notes about the internals of PCRE2
INSTALL generic installation instructions
LICENCE conditions for the use of PCRE2
COPYING the same, using GNU's standard name
Makefile.in ) template for Unix Makefile, which is built by
) "configure"
Makefile.am ) the automake input that was used to create
) Makefile.in
NEWS important changes in this release
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
PrepareRelease script to make preparations for "make dist"
README this file
RunTest a Unix shell script for running tests
RunGrepTest a Unix shell script for pcre2grep tests
aclocal.m4 m4 macros (generated by "aclocal")
config.guess ) files used by libtool,
config.sub ) used only when building a shared library
configure a configuring shell script (built by autoconf)
configure.ac ) the autoconf input that was used to build
) "configure" and config.h
depcomp ) script to find program dependencies, generated by
) automake
doc/*.3 man page sources for PCRE2
doc/*.1 man page sources for pcre2grep and pcre2test
doc/index.html.src the base HTML page
doc/html/* HTML documentation
doc/pcre2.txt plain text version of the man pages
doc/pcre2test.txt plain text documentation of test program
install-sh a shell script for installing files
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
ltmain.sh file used to build a libtool script
missing ) common stub for a few missing GNU programs while
) installing, generated by automake
mkinstalldirs script for making install directories
perltest.sh Script for running a Perl test program
pcre2-config.in source of script which retains PCRE2 information
testdata/testinput* test data for main library tests
testdata/testoutput* expected test results
testdata/grep* input and output for pcre2grep tests
testdata/* other supporting test files
(D) Auxiliary files for cmake support
cmake/COPYING-CMAKE-SCRIPTS
cmake/FindPackageHandleStandardArgs.cmake
cmake/FindEditline.cmake
cmake/FindReadline.cmake
CMakeLists.txt
config-cmake.h.in
(E) Auxiliary files for building PCRE2 "by hand"
src/pcre2.h.generic ) a version of the public PCRE2 header file
) for use in non-"configure" environments
src/config.h.generic ) a version of config.h for use in non-"configure"
) environments
Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
Last updated: 04 December 2020

1548
pcre2/aclocal.m4 vendored Normal file

File diff suppressed because it is too large Load Diff

271
pcre2/ar-lib vendored Executable file

@ -0,0 +1,271 @@
#! /bin/sh
# Wrapper for Microsoft lib.exe
me=ar-lib
scriptversion=2019-07-04.01; # UTC
# Copyright (C) 2010-2020 Free Software Foundation, Inc.
# Written by Peter Rosin <peda@lysator.liu.se>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
# func_error message
func_error ()
{
echo "$me: $1" 1>&2
exit 1
}
file_conv=
# func_file_conv build_file
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN* | MSYS*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv in
mingw)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin | msys)
file=`cygpath -m "$file" || echo "$file"`
;;
wine)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_at_file at_file operation archive
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
# for each of them.
# When interpreting the content of the @FILE, do NOT use func_file_conv,
# since the user would need to supply preconverted file names to
# binutils ar, at least for MinGW.
func_at_file ()
{
operation=$2
archive=$3
at_file_contents=`cat "$1"`
eval set x "$at_file_contents"
shift
for member
do
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
done
}
case $1 in
'')
func_error "no command. Try '$0 --help' for more information."
;;
-h | --h*)
cat <<EOF
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
Members may be specified in a file named with @FILE.
EOF
exit $?
;;
-v | --v*)
echo "$me, version $scriptversion"
exit $?
;;
esac
if test $# -lt 3; then
func_error "you must specify a program, an action and an archive"
fi
AR=$1
shift
while :
do
if test $# -lt 2; then
func_error "you must specify a program, an action and an archive"
fi
case $1 in
-lib | -LIB \
| -ltcg | -LTCG \
| -machine* | -MACHINE* \
| -subsystem* | -SUBSYSTEM* \
| -verbose | -VERBOSE \
| -wx* | -WX* )
AR="$AR $1"
shift
;;
*)
action=$1
shift
break
;;
esac
done
orig_archive=$1
shift
func_file_conv "$orig_archive"
archive=$file
# strip leading dash in $action
action=${action#-}
delete=
extract=
list=
quick=
replace=
index=
create=
while test -n "$action"
do
case $action in
d*) delete=yes ;;
x*) extract=yes ;;
t*) list=yes ;;
q*) quick=yes ;;
r*) replace=yes ;;
s*) index=yes ;;
S*) ;; # the index is always updated implicitly
c*) create=yes ;;
u*) ;; # TODO: don't ignore the update modifier
v*) ;; # TODO: don't ignore the verbose modifier
*)
func_error "unknown action specified"
;;
esac
action=${action#?}
done
case $delete$extract$list$quick$replace,$index in
yes,* | ,yes)
;;
yesyes*)
func_error "more than one action specified"
;;
*)
func_error "no action specified"
;;
esac
if test -n "$delete"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
for member
do
case $1 in
@*)
func_at_file "${1#@}" -REMOVE "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
;;
esac
done
elif test -n "$extract"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
if test $# -gt 0; then
for member
do
case $1 in
@*)
func_at_file "${1#@}" -EXTRACT "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
;;
esac
done
else
$AR -NOLOGO -LIST "$archive" | tr -d '\r' | sed -e 's/\\/\\\\/g' \
| while read member
do
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
done
fi
elif test -n "$quick$replace"; then
if test ! -f "$orig_archive"; then
if test -z "$create"; then
echo "$me: creating $orig_archive"
fi
orig_archive=
else
orig_archive=$archive
fi
for member
do
case $1 in
@*)
func_file_conv "${1#@}"
set x "$@" "@$file"
;;
*)
func_file_conv "$1"
set x "$@" "$file"
;;
esac
shift
shift
done
if test -n "$orig_archive"; then
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
else
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
fi
elif test -n "$list"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
$AR -NOLOGO -LIST "$archive" || exit $?
fi

@ -0,0 +1,22 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,17 @@
# Modified from FindReadline.cmake (PH Feb 2012)
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
set(EDITLINE_FOUND TRUE)
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
/usr/include/editline
/usr/include/edit/readline
/usr/include/readline
)
FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)

@ -0,0 +1,58 @@
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
# This macro is intended to be used in FindXXX.cmake modules files.
# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
# it also sets the <UPPERCASED_NAME>_FOUND variable.
# The package is found if all variables listed are TRUE.
# Example:
#
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
#
# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
# independent whether QUIET was used or not.
# If it is found, the location is reported using the VAR1 argument, so
# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
# If the second argument is DEFAULT_MSG, the message in the failure case will
# be "Could NOT find LibXml2", if you don't like this message you can specify
# your own custom failure message there.
MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
IF (${_NAME}_FIND_REQUIRED)
SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
ELSE (${_NAME}_FIND_REQUIRED)
SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
ENDIF (${_NAME}_FIND_REQUIRED)
ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
SET(_FAIL_MESSAGE "${_FAIL_MSG}")
ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
STRING(TOUPPER ${_NAME} _NAME_UPPER)
SET(${_NAME_UPPER}_FOUND TRUE)
IF(NOT ${_VAR1})
SET(${_NAME_UPPER}_FOUND FALSE)
ENDIF(NOT ${_VAR1})
FOREACH(_CURRENT_VAR ${ARGN})
IF(NOT ${_CURRENT_VAR})
SET(${_NAME_UPPER}_FOUND FALSE)
ENDIF(NOT ${_CURRENT_VAR})
ENDFOREACH(_CURRENT_VAR)
IF (${_NAME_UPPER}_FOUND)
IF (NOT ${_NAME}_FIND_QUIETLY)
MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
ELSE (${_NAME_UPPER}_FOUND)
IF (${_NAME}_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
ELSE (${_NAME}_FIND_REQUIRED)
IF (NOT ${_NAME}_FIND_QUIETLY)
MESSAGE(STATUS "${_FAIL_MESSAGE}")
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
ENDIF (${_NAME}_FIND_REQUIRED)
ENDIF (${_NAME_UPPER}_FOUND)
ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)

@ -0,0 +1,29 @@
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
# --> BSD licensed
#
# GNU Readline library finder
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
set(READLINE_FOUND TRUE)
else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h
/usr/include/readline
)
# 2008-04-22 The next clause used to read like this:
#
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
# include(FindPackageHandleStandardArgs)
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
#
# I was advised to modify it such that it will find an ncurses library if
# required, but not if one was explicitly given, that is, it allows the
# default to be overridden. PH
FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY )
MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY)
endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)

348
pcre2/compile vendored Executable file

@ -0,0 +1,348 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
scriptversion=2018-03-07.03; # UTC
# Copyright (C) 1999-2020 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
nl='
'
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent tools from complaining about whitespace usage.
IFS=" "" $nl"
file_conv=
# func_file_conv build_file lazy
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts. If the determined conversion
# type is listed in (the comma separated) LAZY, no conversion will
# take place.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN* | MSYS*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv/,$2, in
*,$file_conv,*)
;;
mingw/*)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin/* | msys/*)
file=`cygpath -m "$file" || echo "$file"`
;;
wine/*)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_cl_dashL linkdir
# Make cl look for libraries in LINKDIR
func_cl_dashL ()
{
func_file_conv "$1"
if test -z "$lib_path"; then
lib_path=$file
else
lib_path="$lib_path;$file"
fi
linker_opts="$linker_opts -LIBPATH:$file"
}
# func_cl_dashl library
# Do a library search-path lookup for cl
func_cl_dashl ()
{
lib=$1
found=no
save_IFS=$IFS
IFS=';'
for dir in $lib_path $LIB
do
IFS=$save_IFS
if $shared && test -f "$dir/$lib.dll.lib"; then
found=yes
lib=$dir/$lib.dll.lib
break
fi
if test -f "$dir/$lib.lib"; then
found=yes
lib=$dir/$lib.lib
break
fi
if test -f "$dir/lib$lib.a"; then
found=yes
lib=$dir/lib$lib.a
break
fi
done
IFS=$save_IFS
if test "$found" != yes; then
lib=$lib.lib
fi
}
# func_cl_wrapper cl arg...
# Adjust compile command to suit cl
func_cl_wrapper ()
{
# Assume a capable shell
lib_path=
shared=:
linker_opts=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
eat=1
case $2 in
*.o | *.[oO][bB][jJ])
func_file_conv "$2"
set x "$@" -Fo"$file"
shift
;;
*)
func_file_conv "$2"
set x "$@" -Fe"$file"
shift
;;
esac
;;
-I)
eat=1
func_file_conv "$2" mingw
set x "$@" -I"$file"
shift
;;
-I*)
func_file_conv "${1#-I}" mingw
set x "$@" -I"$file"
shift
;;
-l)
eat=1
func_cl_dashl "$2"
set x "$@" "$lib"
shift
;;
-l*)
func_cl_dashl "${1#-l}"
set x "$@" "$lib"
shift
;;
-L)
eat=1
func_cl_dashL "$2"
;;
-L*)
func_cl_dashL "${1#-L}"
;;
-static)
shared=false
;;
-Wl,*)
arg=${1#-Wl,}
save_ifs="$IFS"; IFS=','
for flag in $arg; do
IFS="$save_ifs"
linker_opts="$linker_opts $flag"
done
IFS="$save_ifs"
;;
-Xlinker)
eat=1
linker_opts="$linker_opts $2"
;;
-*)
set x "$@" "$1"
shift
;;
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
func_file_conv "$1"
set x "$@" -Tp"$file"
shift
;;
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
func_file_conv "$1" mingw
set x "$@" "$file"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -n "$linker_opts"; then
linker_opts="-link$linker_opts"
fi
exec "$@" $linker_opts
exit 1
}
eat=
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: compile [--help] [--version] PROGRAM [ARGS]
Wrapper for compilers which do not understand '-c -o'.
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
arguments, and rename the output as expected.
If you are trying to build a whole package this is not the
right script to run: please start by reading the file 'INSTALL'.
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "compile $scriptversion"
exit $?
;;
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \
icl | *[/\\]icl | icl.exe | *[/\\]icl.exe )
func_cl_wrapper "$@" # Doesn't return...
;;
esac
ofile=
cfile=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
# So we strip '-o arg' only if arg is an object.
eat=1
case $2 in
*.o | *.obj)
ofile=$2
;;
*)
set x "$@" -o "$2"
shift
;;
esac
;;
*.c)
cfile=$1
set x "$@" "$1"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -z "$ofile" || test -z "$cfile"; then
# If no '-o' option was seen then we might have been invoked from a
# pattern rule where we don't need one. That is ok -- this is a
# normal compilation that the losing compiler can handle. If no
# '.c' file was seen then we are probably linking. That is also
# ok.
exec "$@"
fi
# Name of file we expect compiler to create.
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
# Create the lock directory.
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
# that we are using for the .o file. Also, base the name on the expected
# object file name, since that is what matters with a parallel build.
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
while true; do
if mkdir "$lockdir" >/dev/null 2>&1; then
break
fi
sleep 1
done
# FIXME: race condition here if user kills between mkdir and trap.
trap "rmdir '$lockdir'; exit 1" 1 2 15
# Run the compile.
"$@"
ret=$?
if test -f "$cofile"; then
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
elif test -f "${cofile}bj"; then
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
fi
rmdir "$lockdir"
exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:

1667
pcre2/config.guess vendored Executable file

File diff suppressed because it is too large Load Diff

1793
pcre2/config.sub vendored Executable file

File diff suppressed because it is too large Load Diff

1114
pcre2/configure.ac vendored

File diff suppressed because it is too large Load Diff

791
pcre2/depcomp vendored Executable file

@ -0,0 +1,791 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
scriptversion=2018-03-07.03; # UTC
# Copyright (C) 1999-2020 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
Run PROGRAMS ARGS to compile a file, generating dependencies
as side-effects.
Environment variables:
depmode Dependency tracking mode.
source Source file read by 'PROGRAMS ARGS'.
object Object file output by 'PROGRAMS ARGS'.
DEPDIR directory where to store dependencies.
depfile Dependency file to output.
tmpdepfile Temporary file to use when outputting dependencies.
libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "depcomp $scriptversion"
exit $?
;;
esac
# Get the directory component of the given path, and save it in the
# global variables '$dir'. Note that this directory component will
# be either empty or ending with a '/' character. This is deliberate.
set_dir_from ()
{
case $1 in
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
*) dir=;;
esac
}
# Get the suffix-stripped basename of the given path, and save it the
# global variable '$base'.
set_base_from ()
{
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
}
# If no dependency file was actually created by the compiler invocation,
# we still have to create a dummy depfile, to avoid errors with the
# Makefile "include basename.Plo" scheme.
make_dummy_depfile ()
{
echo "#dummy" > "$depfile"
}
# Factor out some common post-processing of the generated depfile.
# Requires the auxiliary global variable '$tmpdepfile' to be set.
aix_post_process_depfile ()
{
# If the compiler actually managed to produce a dependency file,
# post-process it.
if test -f "$tmpdepfile"; then
# Each line is of the form 'foo.o: dependency.h'.
# Do two passes, one to just change these to
# $object: dependency.h
# and one to simply output
# dependency.h:
# which is needed to avoid the deleted-header problem.
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
} > "$depfile"
rm -f "$tmpdepfile"
else
make_dummy_depfile
fi
}
# A tabulation character.
tab=' '
# A newline character.
nl='
'
# Character ranges might be problematic outside the C locale.
# These definitions help.
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
lower=abcdefghijklmnopqrstuvwxyz
digits=0123456789
alpha=${upper}${lower}
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2
exit 1
fi
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
depfile=${depfile-`echo "$object" |
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
rm -f "$tmpdepfile"
# Avoid interferences from the environment.
gccflag= dashmflag=
# Some modes work just like other modes, but use different flags. We
# parameterize here, but still list the modes in the big case below,
# to make depend.m4 easier to write. Note that we *cannot* use a case
# here, because this file can only contain one case statement.
if test "$depmode" = hp; then
# HP compiler uses -M and no extra arg.
gccflag=-M
depmode=gcc
fi
if test "$depmode" = dashXmstdout; then
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
if test "$depmode" = msvc7msys; then
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
fi
if test "$depmode" = xlc; then
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
fi
case "$depmode" in
gcc3)
## gcc 3 implements dependency tracking that does exactly what
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
## it if -MD -MP comes after the -MF stuff. Hmm.
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
## the command line argument order; so add the flags where they
## appear in depend2.am. Note that the slowdown incurred here
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
for arg
do
case $arg in
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
*) set fnord "$@" "$arg" ;;
esac
shift # fnord
shift # $arg
done
"$@"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
mv "$tmpdepfile" "$depfile"
;;
gcc)
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
## (see the conditional assignment to $gccflag above).
## There are various ways to get dependency output from gcc. Here's
## why we pick this rather obscure method:
## - Don't want to use -MD because we'd like the dependencies to end
## up in a subdir. Having to rename by hand is ugly.
## (We might end up doing this anyway to support other compilers.)
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
## -MM, not -M (despite what the docs say). Also, it might not be
## supported by the other compilers which use the 'gcc' depmode.
## - Using -M directly means running the compiler twice (even worse
## than renaming).
if test -z "$gccflag"; then
gccflag=-MD,
fi
"$@" -Wp,"$gccflag$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The second -e expression handles DOS-style file names with drive
# letters.
sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the "deleted header file" problem.
## The problem is that when a header file which appears in a .P file
## is deleted, the dependency causes make to die (because there is
## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly.
## Some versions of gcc put a space before the ':'. On the theory
## that the space means something, we add a space to the output as
## well. hp depmode also adds that space, but also prefixes the VPATH
## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
sgi)
if test "$libtool" = yes; then
"$@" "-Wp,-MDupdate,$tmpdepfile"
else
"$@" -MDupdate "$tmpdepfile"
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
echo "$object : \\" > "$depfile"
# Clip off the initial element (the dependent). Don't try to be
# clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
# the IRIX cc adds comments like '#:fec' to the end of the
# dependency line.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
| tr "$nl" ' ' >> "$depfile"
echo >> "$depfile"
# The second pass generates a dummy entry for each header file.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile"
;;
xlc)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
aix)
# The C for AIX Compiler uses -M and outputs the dependencies
# in a .u file. In older versions, this file always lives in the
# current directory. Also, the AIX compiler puts '$object:' at the
# start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.u
tmpdepfile2=$base.u
tmpdepfile3=$dir.libs/$base.u
"$@" -Wc,-M
else
tmpdepfile1=$dir$base.u
tmpdepfile2=$dir$base.u
tmpdepfile3=$dir$base.u
"$@" -M
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
aix_post_process_depfile
;;
tcc)
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
# FIXME: That version still under development at the moment of writing.
# Make that this statement remains true also for stable, released
# versions.
# It will wrap lines (doesn't matter whether long or short) with a
# trailing '\', as in:
#
# foo.o : \
# foo.c \
# foo.h \
#
# It will put a trailing '\' even on the last line, and will use leading
# spaces rather than leading tabs (at least since its commit 0394caf7
# "Emit spaces for -MD").
"$@" -MD -MF "$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
# We have to change lines of the first kind to '$object: \'.
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
# And for each line of the second kind, we have to emit a 'dep.h:'
# dummy dependency, to avoid the deleted-header problem.
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
rm -f "$tmpdepfile"
;;
## The order of this option in the case statement is important, since the
## shell code in configure will try each of these formats in the order
## listed in this file. A plain '-MD' option would be understood by many
## compilers, so we must ensure this comes after the gcc and icc options.
pgcc)
# Portland's C compiler understands '-MD'.
# Will always output deps to 'file.d' where file is the root name of the
# source file under compilation, even if file resides in a subdirectory.
# The object file name does not affect the name of the '.d' file.
# pgcc 10.2 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\' :
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
set_dir_from "$object"
# Use the source, not the object, to determine the base name, since
# that's sadly what pgcc will do too.
set_base_from "$source"
tmpdepfile=$base.d
# For projects that build the same source file twice into different object
# files, the pgcc approach of using the *source* file root name can cause
# problems in parallel builds. Use a locking strategy to avoid stomping on
# the same $tmpdepfile.
lockdir=$base.d-lock
trap "
echo '$0: caught signal, cleaning up...' >&2
rmdir '$lockdir'
exit 1
" 1 2 13 15
numtries=100
i=$numtries
while test $i -gt 0; do
# mkdir is a portable test-and-set.
if mkdir "$lockdir" 2>/dev/null; then
# This process acquired the lock.
"$@" -MD
stat=$?
# Release the lock.
rmdir "$lockdir"
break
else
# If the lock is being held by a different process, wait
# until the winning process is done or we timeout.
while test -d "$lockdir" && test $i -gt 0; do
sleep 1
i=`expr $i - 1`
done
fi
i=`expr $i - 1`
done
trap - 1 2 13 15
if test $i -le 0; then
echo "$0: failed to acquire lock after $numtries attempts" >&2
echo "$0: check lockdir '$lockdir'" >&2
exit 1
fi
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
# Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp2)
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
# compilers, which have integrated preprocessors. The correct option
# to use with these is +Maked; it writes dependencies to a file named
# 'foo.d', which lands next to the object file, wherever that
# happens to be.
# Much of this is similar to the tru64 case; see comments there.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir.libs/$base.d
"$@" -Wc,+Maked
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
"$@" +Maked
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
# Add 'dependent.h:' lines.
sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile" "$tmpdepfile2"
;;
tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
# Libtool generates 2 separate objects for the 2 libraries. These
# two compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir$base.o.d # libtool 1.5
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
"$@" -MD
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
# Same post-processing that is required for AIX mode.
aix_post_process_depfile
;;
msvc7)
if test "$libtool" = yes; then
showIncludes=-Wc,-showIncludes
else
showIncludes=-showIncludes
fi
"$@" $showIncludes > "$tmpdepfile"
stat=$?
grep -v '^Note: including file: ' "$tmpdepfile"
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The first sed program below extracts the file names and escapes
# backslashes for cygpath. The second sed program outputs the file
# name when reading, but also accumulates all include files in the
# hold buffer in order to output them again at the end. This only
# works with sed implementations that can handle large buffers.
sed < "$tmpdepfile" -n '
/^Note: including file: *\(.*\)/ {
s//\1/
s/\\/\\\\/g
p
}' | $cygpath_u | sort -u | sed -n '
s/ /\\ /g
s/\(.*\)/'"$tab"'\1 \\/p
s/.\(.*\) \\/\1:/
H
$ {
s/.*/'"$tab"'/
G
p
}' >> "$depfile"
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
rm -f "$tmpdepfile"
;;
msvc7msys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
#nosideeffect)
# This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones.
dashmstdout)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout, regardless of -o.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
test -z "$dashmflag" && dashmflag=-M
# Require at least two characters before searching for ':'
# in the target name. This is to cope with DOS-style filenames:
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
"$@" $dashmflag |
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this sed invocation
# correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
dashXmstdout)
# This case only exists to satisfy depend.m4. It is never actually
# run, as this mode is specially recognized in the preamble.
exit 1
;;
makedepend)
"$@" || exit $?
# Remove any Libtool call
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# X makedepend
shift
cleared=no eat=no
for arg
do
case $cleared in
no)
set ""; shift
cleared=yes ;;
esac
if test $eat = yes; then
eat=no
continue
fi
case "$arg" in
-D*|-I*)
set fnord "$@" "$arg"; shift ;;
# Strip any option that makedepend may not understand. Remove
# the object too, otherwise makedepend will parse it as a source file.
-arch)
eat=yes ;;
-*|$object)
;;
*)
set fnord "$@" "$arg"; shift ;;
esac
done
obj_suffix=`echo "$object" | sed 's/^.*\././'`
touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile"
# makedepend may prepend the VPATH from the source file name to the object.
# No need to regex-escape $object, excess matching of '.' is harmless.
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process the last invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed '1,2d' "$tmpdepfile" \
| tr ' ' "$nl" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" "$tmpdepfile".bak
;;
cpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
"$@" -E \
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
| sed '$ s: \\$::' > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
cat < "$tmpdepfile" >> "$depfile"
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvisualcpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
IFS=" "
for arg
do
case "$arg" in
-o)
shift
;;
$object)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@"
shift
shift
;;
*)
set fnord "$@" "$arg"
shift
shift
;;
esac
done
"$@" -E 2>/dev/null |
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
echo "$tab" >> "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvcmsys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
none)
exec "$@"
;;
*)
echo "Unknown depmode $depmode" 1>&2
exit 1
;;
esac
exit 0
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:

529
pcre2/install-sh vendored Executable file

@ -0,0 +1,529 @@
#!/bin/sh
# install - install a program, script, or datafile
scriptversion=2018-03-11.20; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
#
# Copyright (C) 1994 X Consortium
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of the X Consortium shall not
# be used in advertising or otherwise to promote the sale, use or other deal-
# ings in this Software without prior written authorization from the X Consor-
# tium.
#
#
# FSF changes to this file are in the public domain.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# 'make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
tab=' '
nl='
'
IFS=" $tab$nl"
# Set DOITPROG to "echo" to test this script.
doit=${DOITPROG-}
doit_exec=${doit:-exec}
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
chgrpprog=${CHGRPPROG-chgrp}
chmodprog=${CHMODPROG-chmod}
chownprog=${CHOWNPROG-chown}
cmpprog=${CMPPROG-cmp}
cpprog=${CPPROG-cp}
mkdirprog=${MKDIRPROG-mkdir}
mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
posix_mkdir=
# Desired mode of installed file.
mode=0755
chgrpcmd=
chmodcmd=$chmodprog
chowncmd=
mvcmd=$mvprog
rmcmd="$rmprog -f"
stripcmd=
src=
dst=
dir_arg=
dst_arg=
copy_on_change=false
is_target_a_directory=possibly
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
or: $0 [OPTION]... SRCFILES... DIRECTORY
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
or: $0 [OPTION]... -d DIRECTORIES...
In the 1st form, copy SRCFILE to DSTFILE.
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
In the 4th, create DIRECTORIES.
Options:
--help display this help and exit.
--version display version info and exit.
-c (ignored)
-C install only if different (preserve the last data modification time)
-d create directories instead of installing files.
-g GROUP $chgrpprog installed files to GROUP.
-m MODE $chmodprog installed files to MODE.
-o USER $chownprog installed files to USER.
-s $stripprog installed files.
-t DIRECTORY install into DIRECTORY.
-T report an error if DSTFILE is a directory.
Environment variables override the default commands:
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
RMPROG STRIPPROG
"
while test $# -ne 0; do
case $1 in
-c) ;;
-C) copy_on_change=true;;
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
case $mode in
*' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
echo "$0: invalid mode: $mode" >&2
exit 1;;
esac
shift;;
-o) chowncmd="$chownprog $2"
shift;;
-s) stripcmd=$stripprog;;
-t)
is_target_a_directory=always
dst_arg=$2
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
shift;;
-T) is_target_a_directory=never;;
--version) echo "$0 $scriptversion"; exit $?;;
--) shift
break;;
-*) echo "$0: invalid option: $1" >&2
exit 1;;
*) break;;
esac
shift
done
# We allow the use of options -d and -T together, by making -d
# take the precedence; this is for compatibility with GNU install.
if test -n "$dir_arg"; then
if test -n "$dst_arg"; then
echo "$0: target directory not allowed when installing a directory." >&2
exit 1
fi
fi
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
# Otherwise, the last argument is the destination. Remove it from $@.
for arg
do
if test -n "$dst_arg"; then
# $@ is not empty: it contains at least $arg.
set fnord "$@" "$dst_arg"
shift # fnord
fi
shift # arg
dst_arg=$arg
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
done
fi
if test $# -eq 0; then
if test -z "$dir_arg"; then
echo "$0: no input file specified." >&2
exit 1
fi
# It's OK to call 'install-sh -d' without argument.
# This can happen when creating conditional directories.
exit 0
fi
if test -z "$dir_arg"; then
if test $# -gt 1 || test "$is_target_a_directory" = always; then
if test ! -d "$dst_arg"; then
echo "$0: $dst_arg: Is not a directory." >&2
exit 1
fi
fi
fi
if test -z "$dir_arg"; then
do_exit='(exit $ret); exit $ret'
trap "ret=129; $do_exit" 1
trap "ret=130; $do_exit" 2
trap "ret=141; $do_exit" 13
trap "ret=143; $do_exit" 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
case $mode in
# Optimize common cases.
*644) cp_umask=133;;
*755) cp_umask=22;;
*[0-7])
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
fi
for src
do
# Protect names problematic for 'test' and other utilities.
case $src in
-* | [=\(\)!]) src=./$src;;
esac
if test -n "$dir_arg"; then
dst=$src
dstdir=$dst
test -d "$dstdir"
dstdir_status=$?
else
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if test ! -f "$src" && test ! -d "$src"; then
echo "$0: $src does not exist." >&2
exit 1
fi
if test -z "$dst_arg"; then
echo "$0: no destination specified." >&2
exit 1
fi
dst=$dst_arg
# If destination is a directory, append the input filename.
if test -d "$dst"; then
if test "$is_target_a_directory" = never; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
dstdir=$dst
dstbase=`basename "$src"`
case $dst in
*/) dst=$dst$dstbase;;
*) dst=$dst/$dstbase;;
esac
dstdir_status=0
else
dstdir=`dirname "$dst"`
test -d "$dstdir"
dstdir_status=$?
fi
fi
case $dstdir in
*/) dstdirslash=$dstdir;;
*) dstdirslash=$dstdir/;;
esac
obsolete_mkdir_used=false
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
# Create intermediate dirs using mode 755 as modified by the umask.
# This is like FreeBSD 'install' as of 1997-10-28.
umask=`umask`
case $stripcmd.$umask in
# Optimize common cases.
*[2367][2367]) mkdir_umask=$umask;;
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
*[0-7])
mkdir_umask=`expr $umask + 22 \
- $umask % 100 % 40 + $umask % 20 \
- $umask % 10 % 4 + $umask % 2
`;;
*) mkdir_umask=$umask,go-w;;
esac
# With -d, create the new directory with the user-specified mode.
# Otherwise, rely on $mkdir_umask.
if test -n "$dir_arg"; then
mkdir_mode=-m$mode
else
mkdir_mode=
fi
posix_mkdir=false
case $umask in
*[123567][0-7][0-7])
# POSIX mkdir -p sets u+wx bits regardless of umask, which
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
# Note that $RANDOM variable is not portable (e.g. dash); Use it
# here however when possible just to lower collision chance.
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0
# Because "mkdir -p" follows existing symlinks and we likely work
# directly in world-writeable /tmp, make sure that the '$tmpdir'
# directory is successfully created first before we actually test
# 'mkdir -p' feature.
if (umask $mkdir_umask &&
$mkdirprog $mkdir_mode "$tmpdir" &&
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
test_tmpdir="$tmpdir/a"
ls_ld_tmpdir=`ls -ld "$test_tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
$mkdirprog -m$different_mode -p -- "$test_tmpdir" && {
ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null
fi
trap '' 0;;
esac;;
esac
if
$posix_mkdir && (
umask $mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
# The umask is ridiculous, or mkdir does not conform to POSIX,
# or it failed possibly due to a race condition. Create the
# directory the slow way, step by step, checking for races as we go.
case $dstdir in
/*) prefix='/';;
[-=\(\)!]*) prefix='./';;
*) prefix='';;
esac
oIFS=$IFS
IFS=/
set -f
set fnord $dstdir
shift
set +f
IFS=$oIFS
prefixes=
for d
do
test X"$d" = X && continue
prefix=$prefix$d
if test -d "$prefix"; then
prefixes=
else
if $posix_mkdir; then
(umask=$mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
# Don't fail if two instances are running concurrently.
test -d "$prefix" || exit 1
else
case $prefix in
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
*) qprefix=$prefix;;
esac
prefixes="$prefixes '$qprefix'"
fi
fi
prefix=$prefix/
done
if test -n "$prefixes"; then
# Don't fail if two instances are running concurrently.
(umask $mkdir_umask &&
eval "\$doit_exec \$mkdirprog $prefixes") ||
test -d "$dstdir" || exit 1
obsolete_mkdir_used=true
fi
fi
fi
if test -n "$dir_arg"; then
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
else
# Make a couple of temp file names in the proper directory.
dsttmp=${dstdirslash}_inst.$$_
rmtmp=${dstdirslash}_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
(umask $cp_umask &&
{ test -z "$stripcmd" || {
# Create $dsttmp read-write so that cp doesn't create it read-only,
# which would cause strip to fail.
if test -z "$doit"; then
: >"$dsttmp" # No need to fork-exec 'touch'.
else
$doit touch "$dsttmp"
fi
}
} &&
$doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $cpprog $src $dsttmp" command.
#
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
rm -f "$dsttmp"
else
# Rename the file to the real destination.
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
# The rename failed, perhaps because mv can't rename something else
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
# Now remove or move aside any old file at destination location.
# We try this two ways since rm can't unlink itself on some
# systems and the destination file might be busy for other
# reasons. In this case, the final cleanup might fail but the new
# file should still install successfully.
{
test ! -f "$dst" ||
$doit $rmcmd -f "$dst" 2>/dev/null ||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
} ||
{ echo "$0: cannot unlink or rename $dst" >&2
(exit 1); exit 1
}
} &&
# Now rename the file to the real destination.
$doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
trap '' 0
fi
done
# Local variables:
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:

@ -8,6 +8,6 @@ includedir=@includedir@
Name: libpcre2-16
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support
Version: @PACKAGE_VERSION@
Libs: -L${libdir} -lpcre2-16
Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@

@ -8,6 +8,6 @@ includedir=@includedir@
Name: libpcre2-32
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support
Version: @PACKAGE_VERSION@
Libs: -L${libdir} -lpcre2-32
Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@

@ -8,6 +8,6 @@ includedir=@includedir@
Name: libpcre2-8
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support
Version: @PACKAGE_VERSION@
Libs: -L${libdir} -lpcre2-8
Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@

@ -8,6 +8,6 @@ includedir=@includedir@
Name: libpcre2-posix
Description: Posix compatible interface to libpcre2-8
Version: @PACKAGE_VERSION@
Libs: -L${libdir} -lpcre2-posix
Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
Requires.private: libpcre2-8

215
pcre2/missing vendored Executable file

@ -0,0 +1,215 @@
#! /bin/sh
# Common wrapper for a few potentially missing GNU programs.
scriptversion=2018-03-07.03; # UTC
# Copyright (C) 1996-2020 Free Software Foundation, Inc.
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
if test $# -eq 0; then
echo 1>&2 "Try '$0 --help' for more information"
exit 1
fi
case $1 in
--is-lightweight)
# Used by our autoconf macros to check whether the available missing
# script is modern enough.
exit 0
;;
--run)
# Back-compat with the calling convention used by older automake.
shift
;;
-h|--h|--he|--hel|--help)
echo "\
$0 [OPTION]... PROGRAM [ARGUMENT]...
Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
to PROGRAM being missing or too old.
Options:
-h, --help display this help and exit
-v, --version output version information and exit
Supported PROGRAM values:
aclocal autoconf autoheader autom4te automake makeinfo
bison yacc flex lex help2man
Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
'g' are ignored when checking the name.
Send bug reports to <bug-automake@gnu.org>."
exit $?
;;
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
echo "missing $scriptversion (GNU Automake)"
exit $?
;;
-*)
echo 1>&2 "$0: unknown '$1' option"
echo 1>&2 "Try '$0 --help' for more information"
exit 1
;;
esac
# Run the given program, remember its exit status.
"$@"; st=$?
# If it succeeded, we are done.
test $st -eq 0 && exit 0
# Also exit now if we it failed (or wasn't found), and '--version' was
# passed; such an option is passed most likely to detect whether the
# program is present and works.
case $2 in --version|--help) exit $st;; esac
# Exit code 63 means version mismatch. This often happens when the user
# tries to use an ancient version of a tool on a file that requires a
# minimum version.
if test $st -eq 63; then
msg="probably too old"
elif test $st -eq 127; then
# Program was missing.
msg="missing on your system"
else
# Program was found and executed, but failed. Give up.
exit $st
fi
perl_URL=https://www.perl.org/
flex_URL=https://github.com/westes/flex
gnu_software_URL=https://www.gnu.org/software
program_details ()
{
case $1 in
aclocal|automake)
echo "The '$1' program is part of the GNU Automake package:"
echo "<$gnu_software_URL/automake>"
echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
echo "<$gnu_software_URL/autoconf>"
echo "<$gnu_software_URL/m4/>"
echo "<$perl_URL>"
;;
autoconf|autom4te|autoheader)
echo "The '$1' program is part of the GNU Autoconf package:"
echo "<$gnu_software_URL/autoconf/>"
echo "It also requires GNU m4 and Perl in order to run:"
echo "<$gnu_software_URL/m4/>"
echo "<$perl_URL>"
;;
esac
}
give_advice ()
{
# Normalize program name to check for.
normalized_program=`echo "$1" | sed '
s/^gnu-//; t
s/^gnu//; t
s/^g//; t'`
printf '%s\n' "'$1' is $msg."
configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
case $normalized_program in
autoconf*)
echo "You should only need it if you modified 'configure.ac',"
echo "or m4 files included by it."
program_details 'autoconf'
;;
autoheader*)
echo "You should only need it if you modified 'acconfig.h' or"
echo "$configure_deps."
program_details 'autoheader'
;;
automake*)
echo "You should only need it if you modified 'Makefile.am' or"
echo "$configure_deps."
program_details 'automake'
;;
aclocal*)
echo "You should only need it if you modified 'acinclude.m4' or"
echo "$configure_deps."
program_details 'aclocal'
;;
autom4te*)
echo "You might have modified some maintainer files that require"
echo "the 'autom4te' program to be rebuilt."
program_details 'autom4te'
;;
bison*|yacc*)
echo "You should only need it if you modified a '.y' file."
echo "You may want to install the GNU Bison package:"
echo "<$gnu_software_URL/bison/>"
;;
lex*|flex*)
echo "You should only need it if you modified a '.l' file."
echo "You may want to install the Fast Lexical Analyzer package:"
echo "<$flex_URL>"
;;
help2man*)
echo "You should only need it if you modified a dependency" \
"of a man page."
echo "You may want to install the GNU Help2man package:"
echo "<$gnu_software_URL/help2man/>"
;;
makeinfo*)
echo "You should only need it if you modified a '.texi' file, or"
echo "any other file indirectly affecting the aspect of the manual."
echo "You might want to install the Texinfo package:"
echo "<$gnu_software_URL/texinfo/>"
echo "The spurious makeinfo call might also be the consequence of"
echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
echo "want to install GNU make:"
echo "<$gnu_software_URL/make/>"
;;
*)
echo "You might have modified some files without having the proper"
echo "tools for further handling them. Check the 'README' file, it"
echo "often tells you about the needed prerequisites for installing"
echo "this package. You may also peek at any GNU archive site, in"
echo "case some other package contains this missing '$1' program."
;;
esac
}
give_advice "$1" | sed -e '1s/^/WARNING: /' \
-e '2,$s/^/ /' >&2
# Propagate the correct exit status (expected to be 127 for a program
# not found, 63 for a program that failed due to version mismatch).
exit $st
# Local variables:
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:

@ -86,28 +86,28 @@ while test $# -gt 0; do
;;
--libs-posix)
if test @enable_pcre2_8@ = yes ; then
echo $libS$libR -lpcre2-posix -lpcre2-8
echo $libS$libR -lpcre2-posix@LIB_POSTFIX@ -lpcre2-8@LIB_POSTFIX@
else
echo "${usage}" 1>&2
fi
;;
--libs8)
if test @enable_pcre2_8@ = yes ; then
echo $libS$libR -lpcre2-8
echo $libS$libR -lpcre2-8@LIB_POSTFIX@
else
echo "${usage}" 1>&2
fi
;;
--libs16)
if test @enable_pcre2_16@ = yes ; then
echo $libS$libR -lpcre2-16
echo $libS$libR -lpcre2-16@LIB_POSTFIX@
else
echo "${usage}" 1>&2
fi
;;
--libs32)
if test @enable_pcre2_32@ = yes ; then
echo $libS$libR -lpcre2-32
echo $libS$libR -lpcre2-32@LIB_POSTFIX@
else
echo "${usage}" 1>&2
fi

381
pcre2/src/config.h.generic Normal file

@ -0,0 +1,381 @@
/* src/config.h. Generated from config.h.in by configure. */
/* src/config.h.in. Generated from configure.ac by autoheader. */
/* PCRE2 is written in Standard C, but there are a few non-standard things it
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
so that config.h is included at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
such macros are listed as a commented #undef in config.h.generic. Macros such
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
The build-time default can be overridden by the user of PCRE2 at runtime.
*/
/* #undef BSR_ANYCRLF */
/* Define to any value to disable the use of the z and t modifiers in
formatting settings such as %zu or %td (this is rarely needed). */
/* #undef DISABLE_PERCENT_ZT */
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. When EBCDIC is set, PCRE2
assumes that all input strings are in EBCDIC. If you do not define this
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
is not possible to build a version of PCRE2 that supports both EBCDIC and
UTF-8/16/32. */
/* #undef EBCDIC */
/* In an EBCDIC environment, define this macro to any value to arrange for the
NL character to be 0x25 instead of the default 0x15. NL plays the role that
LF does in an ASCII/Unicode environment. */
/* #undef EBCDIC_NL25 */
/* Define this if your compiler supports __attribute__((uninitialized)) */
/* #undef HAVE_ATTRIBUTE_UNINITIALIZED */
/* Define to 1 if you have the `bcopy' function. */
/* #undef HAVE_BCOPY */
/* Define to 1 if you have the <bzlib.h> header file. */
/* #undef HAVE_BZLIB_H */
/* Define to 1 if you have the <dirent.h> header file. */
/* #undef HAVE_DIRENT_H */
/* Define to 1 if you have the <dlfcn.h> header file. */
/* #undef HAVE_DLFCN_H */
/* Define to 1 if you have the <editline/readline.h> header file. */
/* #undef HAVE_EDITLINE_READLINE_H */
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
/* #undef HAVE_EDIT_READLINE_READLINE_H */
/* Define to 1 if you have the <inttypes.h> header file. */
/* #undef HAVE_INTTYPES_H */
/* Define to 1 if you have the <limits.h> header file. */
/* #undef HAVE_LIMITS_H */
/* Define to 1 if you have the `memfd_create' function. */
/* #undef HAVE_MEMFD_CREATE */
/* Define to 1 if you have the `memmove' function. */
/* #undef HAVE_MEMMOVE */
/* Define to 1 if you have the <memory.h> header file. */
/* #undef HAVE_MEMORY_H */
/* Define to 1 if you have the `mkostemp' function. */
/* #undef HAVE_MKOSTEMP */
/* Define if you have POSIX threads libraries and header files. */
/* #undef HAVE_PTHREAD */
/* Have PTHREAD_PRIO_INHERIT. */
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
/* Define to 1 if you have the <readline/history.h> header file. */
/* #undef HAVE_READLINE_HISTORY_H */
/* Define to 1 if you have the <readline/readline.h> header file. */
/* #undef HAVE_READLINE_READLINE_H */
/* Define to 1 if you have the `secure_getenv' function. */
/* #undef HAVE_SECURE_GETENV */
/* Define to 1 if you have the <stdint.h> header file. */
/* #undef HAVE_STDINT_H */
/* Define to 1 if you have the <stdlib.h> header file. */
/* #undef HAVE_STDLIB_H */
/* Define to 1 if you have the `strerror' function. */
/* #undef HAVE_STRERROR */
/* Define to 1 if you have the <strings.h> header file. */
/* #undef HAVE_STRINGS_H */
/* Define to 1 if you have the <string.h> header file. */
/* #undef HAVE_STRING_H */
/* Define to 1 if you have the <sys/stat.h> header file. */
/* #undef HAVE_SYS_STAT_H */
/* Define to 1 if you have the <sys/types.h> header file. */
/* #undef HAVE_SYS_TYPES_H */
/* Define to 1 if you have the <sys/wait.h> header file. */
/* #undef HAVE_SYS_WAIT_H */
/* Define to 1 if you have the <unistd.h> header file. */
/* #undef HAVE_UNISTD_H */
/* Define to 1 if the compiler supports simple visibility declarations. */
/* #undef HAVE_VISIBILITY */
/* Define to 1 if you have the <windows.h> header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
/* This limits the amount of memory that may be used while matching a pattern.
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
#ifndef HEAP_LIMIT
#define HEAP_LIMIT 20000000
#endif
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 65535 code units long. This covers the vast
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
instead. This allows for longer patterns in extreme cases. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif
/* Define to the sub-directory where libtool stores uninstalled libraries. */
/* This is ignored unless you are using libtool. */
#ifndef LT_OBJDIR
#define LT_OBJDIR ".libs/"
#endif
/* The value of MATCH_LIMIT determines the default number of times the
pcre2_match() function can record a backtrack position during a single
matching attempt. The value is also used to limit a loop counter in
pcre2_dfa_match(). There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular expressions that
take for ever to determine that they do not match. The default is set very
large so that it does not accidentally catch legitimate cases. */
#ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000
#endif
/* The above limit applies to all backtracks, whether or not they are nested.
In some environments it is desirable to limit the nesting of backtracking
(that is, the depth of tree that is searched) more strictly, in order to
restrict the maximum amount of heap memory that is used. The value of
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
must be less than the value of MATCH_LIMIT. The default is to use the same
value as MATCH_LIMIT. There is a runtime method for setting a different
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
the internal nested function calls that are used for pattern recursions,
lookarounds, and atomic groups. */
#ifndef MATCH_LIMIT_DEPTH
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_COUNT
#define MAX_NAME_COUNT 10000
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_SIZE
#define MAX_NAME_SIZE 32
#endif
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
/* #undef NEVER_BACKSLASH_C */
/* The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
(ANYCRLF), and 6 (NUL). */
#ifndef NEWLINE_DEFAULT
#define NEWLINE_DEFAULT 2
#endif
/* Name of package */
#define PACKAGE "pcre2"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE2 10.36"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "10.36"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern. */
#ifndef PARENS_NEST_LIMIT
#define PARENS_NEST_LIMIT 250
#endif
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
pcre2grep to hold parts of the file it is searching. The buffer will be
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
very long lines. The actual amount of memory used by pcre2grep is three
times this number, because it allows for the buffering of "before" and
"after" lines. */
#ifndef PCRE2GREP_BUFSIZE
#define PCRE2GREP_BUFSIZE 20480
#endif
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
used by pcre2grep to hold parts of the file it is searching. The actual
amount of memory used by pcre2grep is three times this number, because it
allows for the buffering of "before" and "after" lines. */
#ifndef PCRE2GREP_MAX_BUFSIZE
#define PCRE2GREP_MAX_BUFSIZE 1048576
#endif
/* Define to any value to include debugging code. */
/* #undef PCRE2_DEBUG */
/* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, a suitable
__declspec value is used for Windows systems; in other environments
"extern" is used for a C compiler and "extern C" for a C++ compiler.
This macro apears at the start of every exported function that is part
of the external API. It does not appear on functions that are "external"
in the C sense, but which are internal to the library. */
/* #undef PCRE2_EXP_DEFN */
/* Define to any value if linking statically (TODO: make nice with Libtool) */
/* #undef PCRE2_STATIC */
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
/* #undef PTHREAD_CREATE_JOINABLE */
/* Define to any non-zero number to enable support for SELinux compatible
executable memory allocator in JIT. Note that this will have no effect
unless SUPPORT_JIT is also defined. */
/* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */
/* Define to 1 if you have the ANSI C header files. */
/* #undef STDC_HEADERS */
/* Define to any value to enable support for Just-In-Time compiling. */
/* #undef SUPPORT_JIT */
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
is able to handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */
/* Define to any value to allow pcre2test to be linked with libedit. */
/* #undef SUPPORT_LIBEDIT */
/* Define to any value to allow pcre2test to be linked with libreadline. */
/* #undef SUPPORT_LIBREADLINE */
/* Define to any value to allow pcre2grep to be linked with libz, so that it
is able to handle .gz files. */
/* #undef SUPPORT_LIBZ */
/* Define to any value to enable callout script support in pcre2grep. */
/* #undef SUPPORT_PCRE2GREP_CALLOUT */
/* Define to any value to enable fork support in pcre2grep callout scripts.
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
*/
/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */
/* Define to any value to enable JIT support in pcre2grep. Note that this will
have no effect unless SUPPORT_JIT is also defined. */
/* #undef SUPPORT_PCRE2GREP_JIT */
/* Define to any value to enable the 16 bit PCRE2 library. */
/* #undef SUPPORT_PCRE2_16 */
/* Define to any value to enable the 32 bit PCRE2 library. */
/* #undef SUPPORT_PCRE2_32 */
/* Define to any value to enable the 8 bit PCRE2 library. */
/* #undef SUPPORT_PCRE2_8 */
/* Define to any value to enable support for Unicode and UTF encoding. This
will work even in an EBCDIC environment, but it is incompatible with the
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
ASCII/Unicode, but not both at once. */
/* #undef SUPPORT_UNICODE */
/* Define to any value for valgrind support to find invalid memory reads. */
/* #undef SUPPORT_VALGRIND */
/* Enable extensions on AIX 3, Interix. */
#ifndef _ALL_SOURCE
# define _ALL_SOURCE 1
#endif
/* Enable GNU extensions on systems that have them. */
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1
#endif
/* Enable threading extensions on Solaris. */
#ifndef _POSIX_PTHREAD_SEMANTICS
# define _POSIX_PTHREAD_SEMANTICS 1
#endif
/* Enable extensions on HP NonStop. */
#ifndef _TANDEM_SOURCE
# define _TANDEM_SOURCE 1
#endif
/* Enable general extensions on Solaris. */
#ifndef __EXTENSIONS__
# define __EXTENSIONS__ 1
#endif
/* Version number of package */
#define VERSION "10.36"
/* Define to 1 if on MINIX. */
/* #undef _MINIX */
/* Define to 2 if the system does not provide POSIX.1 features except with
this defined. */
/* #undef _POSIX_1_SOURCE */
/* Define to 1 if you need to in order for `stat' and other things to work. */
/* #undef _POSIX_SOURCE */
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
/* #undef int64_t */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */

369
pcre2/src/config.h.in Normal file

@ -0,0 +1,369 @@
/* src/config.h.in. Generated from configure.ac by autoheader. */
/* PCRE2 is written in Standard C, but there are a few non-standard things it
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
so that config.h is included at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
such macros are listed as a commented #undef in config.h.generic. Macros such
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
The build-time default can be overridden by the user of PCRE2 at runtime.
*/
#undef BSR_ANYCRLF
/* Define to any value to disable the use of the z and t modifiers in
formatting settings such as %zu or %td (this is rarely needed). */
#undef DISABLE_PERCENT_ZT
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. When EBCDIC is set, PCRE2
assumes that all input strings are in EBCDIC. If you do not define this
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
is not possible to build a version of PCRE2 that supports both EBCDIC and
UTF-8/16/32. */
#undef EBCDIC
/* In an EBCDIC environment, define this macro to any value to arrange for the
NL character to be 0x25 instead of the default 0x15. NL plays the role that
LF does in an ASCII/Unicode environment. */
#undef EBCDIC_NL25
/* Define this if your compiler supports __attribute__((uninitialized)) */
#undef HAVE_ATTRIBUTE_UNINITIALIZED
/* Define to 1 if you have the `bcopy' function. */
#undef HAVE_BCOPY
/* Define to 1 if you have the <bzlib.h> header file. */
#undef HAVE_BZLIB_H
/* Define to 1 if you have the <dirent.h> header file. */
#undef HAVE_DIRENT_H
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the <editline/readline.h> header file. */
#undef HAVE_EDITLINE_READLINE_H
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
#undef HAVE_EDIT_READLINE_READLINE_H
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the <limits.h> header file. */
#undef HAVE_LIMITS_H
/* Define to 1 if you have the `memfd_create' function. */
#undef HAVE_MEMFD_CREATE
/* Define to 1 if you have the `memmove' function. */
#undef HAVE_MEMMOVE
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the `mkostemp' function. */
#undef HAVE_MKOSTEMP
/* Define if you have POSIX threads libraries and header files. */
#undef HAVE_PTHREAD
/* Have PTHREAD_PRIO_INHERIT. */
#undef HAVE_PTHREAD_PRIO_INHERIT
/* Define to 1 if you have the <readline/history.h> header file. */
#undef HAVE_READLINE_HISTORY_H
/* Define to 1 if you have the <readline/readline.h> header file. */
#undef HAVE_READLINE_READLINE_H
/* Define to 1 if you have the `secure_getenv' function. */
#undef HAVE_SECURE_GETENV
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/wait.h> header file. */
#undef HAVE_SYS_WAIT_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if the compiler supports simple visibility declarations. */
#undef HAVE_VISIBILITY
/* Define to 1 if you have the <windows.h> header file. */
#undef HAVE_WINDOWS_H
/* Define to 1 if you have the <zlib.h> header file. */
#undef HAVE_ZLIB_H
/* This limits the amount of memory that may be used while matching a pattern.
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
#undef HEAP_LIMIT
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 65535 code units long. This covers the vast
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
instead. This allows for longer patterns in extreme cases. */
#undef LINK_SIZE
/* Define to the sub-directory where libtool stores uninstalled libraries. */
#undef LT_OBJDIR
/* The value of MATCH_LIMIT determines the default number of times the
pcre2_match() function can record a backtrack position during a single
matching attempt. The value is also used to limit a loop counter in
pcre2_dfa_match(). There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular expressions that
take for ever to determine that they do not match. The default is set very
large so that it does not accidentally catch legitimate cases. */
#undef MATCH_LIMIT
/* The above limit applies to all backtracks, whether or not they are nested.
In some environments it is desirable to limit the nesting of backtracking
(that is, the depth of tree that is searched) more strictly, in order to
restrict the maximum amount of heap memory that is used. The value of
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
must be less than the value of MATCH_LIMIT. The default is to use the same
value as MATCH_LIMIT. There is a runtime method for setting a different
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
the internal nested function calls that are used for pattern recursions,
lookarounds, and atomic groups. */
#undef MATCH_LIMIT_DEPTH
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#undef MAX_NAME_COUNT
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#undef MAX_NAME_SIZE
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
#undef NEVER_BACKSLASH_C
/* The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
(ANYCRLF), and 6 (NUL). */
#undef NEWLINE_DEFAULT
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern. */
#undef PARENS_NEST_LIMIT
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
pcre2grep to hold parts of the file it is searching. The buffer will be
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
very long lines. The actual amount of memory used by pcre2grep is three
times this number, because it allows for the buffering of "before" and
"after" lines. */
#undef PCRE2GREP_BUFSIZE
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
used by pcre2grep to hold parts of the file it is searching. The actual
amount of memory used by pcre2grep is three times this number, because it
allows for the buffering of "before" and "after" lines. */
#undef PCRE2GREP_MAX_BUFSIZE
/* to make a symbol visible */
#undef PCRE2POSIX_EXP_DECL
/* to make a symbol visible */
#undef PCRE2POSIX_EXP_DEFN
/* Define to any value to include debugging code. */
#undef PCRE2_DEBUG
/* to make a symbol visible */
#undef PCRE2_EXP_DECL
/* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, a suitable
__declspec value is used for Windows systems; in other environments
"extern" is used for a C compiler and "extern C" for a C++ compiler.
This macro apears at the start of every exported function that is part
of the external API. It does not appear on functions that are "external"
in the C sense, but which are internal to the library. */
#undef PCRE2_EXP_DEFN
/* Define to any value if linking statically (TODO: make nice with Libtool) */
#undef PCRE2_STATIC
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
#undef PTHREAD_CREATE_JOINABLE
/* Define to any non-zero number to enable support for SELinux compatible
executable memory allocator in JIT. Note that this will have no effect
unless SUPPORT_JIT is also defined. */
#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define to any value to enable support for Just-In-Time compiling. */
#undef SUPPORT_JIT
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
is able to handle .bz2 files. */
#undef SUPPORT_LIBBZ2
/* Define to any value to allow pcre2test to be linked with libedit. */
#undef SUPPORT_LIBEDIT
/* Define to any value to allow pcre2test to be linked with libreadline. */
#undef SUPPORT_LIBREADLINE
/* Define to any value to allow pcre2grep to be linked with libz, so that it
is able to handle .gz files. */
#undef SUPPORT_LIBZ
/* Define to any value to enable callout script support in pcre2grep. */
#undef SUPPORT_PCRE2GREP_CALLOUT
/* Define to any value to enable fork support in pcre2grep callout scripts.
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
*/
#undef SUPPORT_PCRE2GREP_CALLOUT_FORK
/* Define to any value to enable JIT support in pcre2grep. Note that this will
have no effect unless SUPPORT_JIT is also defined. */
#undef SUPPORT_PCRE2GREP_JIT
/* Define to any value to enable the 16 bit PCRE2 library. */
#undef SUPPORT_PCRE2_16
/* Define to any value to enable the 32 bit PCRE2 library. */
#undef SUPPORT_PCRE2_32
/* Define to any value to enable the 8 bit PCRE2 library. */
#undef SUPPORT_PCRE2_8
/* Define to any value to enable support for Unicode and UTF encoding. This
will work even in an EBCDIC environment, but it is incompatible with the
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
ASCII/Unicode, but not both at once. */
#undef SUPPORT_UNICODE
/* Define to any value for valgrind support to find invalid memory reads. */
#undef SUPPORT_VALGRIND
/* Enable extensions on AIX 3, Interix. */
#ifndef _ALL_SOURCE
# undef _ALL_SOURCE
#endif
/* Enable GNU extensions on systems that have them. */
#ifndef _GNU_SOURCE
# undef _GNU_SOURCE
#endif
/* Enable threading extensions on Solaris. */
#ifndef _POSIX_PTHREAD_SEMANTICS
# undef _POSIX_PTHREAD_SEMANTICS
#endif
/* Enable extensions on HP NonStop. */
#ifndef _TANDEM_SOURCE
# undef _TANDEM_SOURCE
#endif
/* Enable general extensions on Solaris. */
#ifndef __EXTENSIONS__
# undef __EXTENSIONS__
#endif
/* Version number of package */
#undef VERSION
/* Define to 1 if on MINIX. */
#undef _MINIX
/* Define to 2 if the system does not provide POSIX.1 features except with
this defined. */
#undef _POSIX_1_SOURCE
/* Define to 1 if you need to in order for `stat' and other things to work. */
#undef _POSIX_SOURCE
/* Define to empty if `const' does not conform to ANSI C. */
#undef const
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef int64_t
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t

991
pcre2/src/pcre2.h.generic Normal file

@ -0,0 +1,991 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_H_IDEMPOTENT_GUARD
#define PCRE2_H_IDEMPOTENT_GUARD
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
#define PCRE2_MINOR 36
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2020-12-04
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
export setting is defined in pcre2_internal.h, which includes this file. So we
don't change existing definitions of PCRE2_EXP_DECL. */
#if defined(_WIN32) && !defined(PCRE2_STATIC)
# ifndef PCRE2_EXP_DECL
# define PCRE2_EXP_DECL extern __declspec(dllimport)
# endif
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE2_EXP_DECL
# ifdef __cplusplus
# define PCRE2_EXP_DECL extern "C"
# else
# define PCRE2_EXP_DECL extern
# endif
#endif
/* When compiling with the MSVC compiler, it is sometimes necessary to include
a "calling convention" before exported function names. (This is secondhand
information; I know nothing about MSVC myself). For example, something like
void __cdecl function(....)
might be needed. In order so make this easy, all the exported functions have
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
set, we ensure here that it has no effect. */
#ifndef PCRE2_CALL_CONVENTION
#define PCRE2_CALL_CONVENTION
#endif
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
not have stdint.h, which is why we use inttypes.h, which according to the C
standard is a superset of stdint.h. If none of these headers are available,
the relevant values must be provided by some other means. */
#include <limits.h>
#include <stdlib.h>
#include <inttypes.h>
/* Allow for C++ users compiling this directly. */
#ifdef __cplusplus
extern "C" {
#endif
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
is passed. Put these bits at the most significant end of the options word so
others can be added next to them */
#define PCRE2_ANCHORED 0x80000000u
#define PCRE2_NO_UTF_CHECK 0x40000000u
#define PCRE2_ENDANCHORED 0x20000000u
/* The following option bits can be passed only to pcre2_compile(). However,
they may affect compilation, JIT compilation, and/or interpretive execution.
The following tags indicate which:
C alters what is compiled by pcre2_compile()
J alters what is compiled by pcre2_jit_compile()
M is inspected during pcre2_match() execution
D is inspected during pcre2_dfa_match() execution
*/
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
#define PCRE2_ALT_BSUX 0x00000002u /* C */
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
#define PCRE2_CASELESS 0x00000008u /* C */
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
#define PCRE2_DOTALL 0x00000020u /* C */
#define PCRE2_DUPNAMES 0x00000040u /* C */
#define PCRE2_EXTENDED 0x00000080u /* C */
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
#define PCRE2_MULTILINE 0x00000400u /* C */
#define PCRE2_NEVER_UCP 0x00000800u /* C */
#define PCRE2_NEVER_UTF 0x00001000u /* C */
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
#define PCRE2_UCP 0x00020000u /* C J M D */
#define PCRE2_UNGREEDY 0x00040000u /* C */
#define PCRE2_UTF 0x00080000u /* C J M D */
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
#define PCRE2_LITERAL 0x02000000u /* C */
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
/* An additional compile options word is available in the compile context. */
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
/* These are for pcre2_jit_compile(). */
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
#define PCRE2_JIT_INVALID_UTF 0x00000100u
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
pcre2_substitute(). Some are allowed only for one of the functions, and in
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
#define PCRE2_NOTBOL 0x00000001u
#define PCRE2_NOTEOL 0x00000002u
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
#define PCRE2_PARTIAL_SOFT 0x00000010u
#define PCRE2_PARTIAL_HARD 0x00000020u
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
/* Options for pcre2_pattern_convert(). */
#define PCRE2_CONVERT_UTF 0x00000001u
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
#define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be
greater than zero. */
#define PCRE2_NEWLINE_CR 1
#define PCRE2_NEWLINE_LF 2
#define PCRE2_NEWLINE_CRLF 3
#define PCRE2_NEWLINE_ANY 4
#define PCRE2_NEWLINE_ANYCRLF 5
#define PCRE2_NEWLINE_NUL 6
#define PCRE2_BSR_UNICODE 1
#define PCRE2_BSR_ANYCRLF 2
/* Error codes for pcre2_compile(). Some of these are also used by
pcre2_pattern_convert(). */
#define PCRE2_ERROR_END_BACKSLASH 101
#define PCRE2_ERROR_END_BACKSLASH_C 102
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
#define PCRE2_ERROR_NULL_PATTERN 116
#define PCRE2_ERROR_BAD_OPTIONS 117
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
#define PCRE2_ERROR_HEAP_FAILED 121
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
/* Error 159 is obsolete and should now never occur */
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
#define PCRE2_ERROR_VERB_UNKNOWN 160
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
#define PCRE2_ERROR_INVALID_OCTAL 164
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
#define PCRE2_ERROR_UTF_IS_DISABLED 174
#define PCRE2_ERROR_UCP_IS_DISABLED 175
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198
/* "Expected" matching error codes: no match and partial match. */
#define PCRE2_ERROR_NOMATCH (-1)
#define PCRE2_ERROR_PARTIAL (-2)
/* Error codes for UTF-8 validity checks */
#define PCRE2_ERROR_UTF8_ERR1 (-3)
#define PCRE2_ERROR_UTF8_ERR2 (-4)
#define PCRE2_ERROR_UTF8_ERR3 (-5)
#define PCRE2_ERROR_UTF8_ERR4 (-6)
#define PCRE2_ERROR_UTF8_ERR5 (-7)
#define PCRE2_ERROR_UTF8_ERR6 (-8)
#define PCRE2_ERROR_UTF8_ERR7 (-9)
#define PCRE2_ERROR_UTF8_ERR8 (-10)
#define PCRE2_ERROR_UTF8_ERR9 (-11)
#define PCRE2_ERROR_UTF8_ERR10 (-12)
#define PCRE2_ERROR_UTF8_ERR11 (-13)
#define PCRE2_ERROR_UTF8_ERR12 (-14)
#define PCRE2_ERROR_UTF8_ERR13 (-15)
#define PCRE2_ERROR_UTF8_ERR14 (-16)
#define PCRE2_ERROR_UTF8_ERR15 (-17)
#define PCRE2_ERROR_UTF8_ERR16 (-18)
#define PCRE2_ERROR_UTF8_ERR17 (-19)
#define PCRE2_ERROR_UTF8_ERR18 (-20)
#define PCRE2_ERROR_UTF8_ERR19 (-21)
#define PCRE2_ERROR_UTF8_ERR20 (-22)
#define PCRE2_ERROR_UTF8_ERR21 (-23)
/* Error codes for UTF-16 validity checks */
#define PCRE2_ERROR_UTF16_ERR1 (-24)
#define PCRE2_ERROR_UTF16_ERR2 (-25)
#define PCRE2_ERROR_UTF16_ERR3 (-26)
/* Error codes for UTF-32 validity checks */
#define PCRE2_ERROR_UTF32_ERR1 (-27)
#define PCRE2_ERROR_UTF32_ERR2 (-28)
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
functions, context functions, and serializing functions. They are in numerical
order. Originally they were in alphabetical order too, but now that PCRE2 is
released, the numbers must not be changed. */
#define PCRE2_ERROR_BADDATA (-29)
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
#define PCRE2_ERROR_BADMAGIC (-31)
#define PCRE2_ERROR_BADMODE (-32)
#define PCRE2_ERROR_BADOFFSET (-33)
#define PCRE2_ERROR_BADOPTION (-34)
#define PCRE2_ERROR_BADREPLACEMENT (-35)
#define PCRE2_ERROR_BADUTFOFFSET (-36)
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
#define PCRE2_ERROR_DFA_BADRESTART (-38)
#define PCRE2_ERROR_DFA_RECURSE (-39)
#define PCRE2_ERROR_DFA_UCOND (-40)
#define PCRE2_ERROR_DFA_UFUNC (-41)
#define PCRE2_ERROR_DFA_UITEM (-42)
#define PCRE2_ERROR_DFA_WSSIZE (-43)
#define PCRE2_ERROR_INTERNAL (-44)
#define PCRE2_ERROR_JIT_BADOPTION (-45)
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
#define PCRE2_ERROR_MATCHLIMIT (-47)
#define PCRE2_ERROR_NOMEMORY (-48)
#define PCRE2_ERROR_NOSUBSTRING (-49)
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
#define PCRE2_ERROR_NULL (-51)
#define PCRE2_ERROR_RECURSELOOP (-52)
#define PCRE2_ERROR_DEPTHLIMIT (-53)
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
#define PCRE2_ERROR_UNAVAILABLE (-54)
#define PCRE2_ERROR_UNSET (-55)
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
#define PCRE2_ERROR_BADREPESCAPE (-57)
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
/* Request types for pcre2_pattern_info() */
#define PCRE2_INFO_ALLOPTIONS 0
#define PCRE2_INFO_ARGOPTIONS 1
#define PCRE2_INFO_BACKREFMAX 2
#define PCRE2_INFO_BSR 3
#define PCRE2_INFO_CAPTURECOUNT 4
#define PCRE2_INFO_FIRSTCODEUNIT 5
#define PCRE2_INFO_FIRSTCODETYPE 6
#define PCRE2_INFO_FIRSTBITMAP 7
#define PCRE2_INFO_HASCRORLF 8
#define PCRE2_INFO_JCHANGED 9
#define PCRE2_INFO_JITSIZE 10
#define PCRE2_INFO_LASTCODEUNIT 11
#define PCRE2_INFO_LASTCODETYPE 12
#define PCRE2_INFO_MATCHEMPTY 13
#define PCRE2_INFO_MATCHLIMIT 14
#define PCRE2_INFO_MAXLOOKBEHIND 15
#define PCRE2_INFO_MINLENGTH 16
#define PCRE2_INFO_NAMECOUNT 17
#define PCRE2_INFO_NAMEENTRYSIZE 18
#define PCRE2_INFO_NAMETABLE 19
#define PCRE2_INFO_NEWLINE 20
#define PCRE2_INFO_DEPTHLIMIT 21
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
#define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25
#define PCRE2_INFO_EXTRAOPTIONS 26
/* Request types for pcre2_config(). */
#define PCRE2_CONFIG_BSR 0
#define PCRE2_CONFIG_JIT 1
#define PCRE2_CONFIG_JITTARGET 2
#define PCRE2_CONFIG_LINKSIZE 3
#define PCRE2_CONFIG_MATCHLIMIT 4
#define PCRE2_CONFIG_NEWLINE 5
#define PCRE2_CONFIG_PARENSLIMIT 6
#define PCRE2_CONFIG_DEPTHLIMIT 7
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
#define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11
#define PCRE2_CONFIG_HEAPLIMIT 12
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
#define PCRE2_CONFIG_TABLES_LENGTH 15
/* Types for code units in patterns and subject strings. */
typedef uint8_t PCRE2_UCHAR8;
typedef uint16_t PCRE2_UCHAR16;
typedef uint32_t PCRE2_UCHAR32;
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
including pattern offsets for errors and subject offsets after a match. We
define special values to indicate zero-terminated strings and unset offsets in
the offset vector (ovector). */
#define PCRE2_SIZE size_t
#define PCRE2_SIZE_MAX SIZE_MAX
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
/* Generic types for opaque structures and JIT callback functions. These
declarations are defined in a macro that is expanded for each width later. */
#define PCRE2_TYPES_LIST \
struct pcre2_real_general_context; \
typedef struct pcre2_real_general_context pcre2_general_context; \
\
struct pcre2_real_compile_context; \
typedef struct pcre2_real_compile_context pcre2_compile_context; \
\
struct pcre2_real_match_context; \
typedef struct pcre2_real_match_context pcre2_match_context; \
\
struct pcre2_real_convert_context; \
typedef struct pcre2_real_convert_context pcre2_convert_context; \
\
struct pcre2_real_code; \
typedef struct pcre2_real_code pcre2_code; \
\
struct pcre2_real_match_data; \
typedef struct pcre2_real_match_data pcre2_match_data; \
\
struct pcre2_real_jit_stack; \
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
\
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
/* The structures for passing out data via callout functions. We use structures
so that new fields can be added on the end in future versions, without changing
the API of the function, thereby allowing old clients to work without
modification. Define the generic versions in a macro; the width-specific
versions are generated from this macro below. */
/* Flags for the callout_flags field. These are cleared after a callout. */
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
#define PCRE2_STRUCTURE_LIST \
typedef struct pcre2_callout_block { \
uint32_t version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
uint32_t callout_number; /* Number compiled into pattern */ \
uint32_t capture_top; /* Max current capture */ \
uint32_t capture_last; /* Most recently closed capture */ \
PCRE2_SIZE *offset_vector; /* The offset vector */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
PCRE2_SPTR subject; /* The subject being matched */ \
PCRE2_SIZE subject_length; /* The length of the subject */ \
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 1 -------------------------- */ \
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
/* ------------------- Added for Version 2 -------------------------- */ \
uint32_t callout_flags; /* See above for list */ \
/* ------------------------------------------------------------------ */ \
} pcre2_callout_block; \
\
typedef struct pcre2_callout_enumerate_block { \
uint32_t version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
uint32_t callout_number; /* Number compiled into pattern */ \
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
} pcre2_callout_enumerate_block; \
\
typedef struct pcre2_substitute_callout_block { \
uint32_t version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
PCRE2_SPTR input; /* Pointer to input subject string */ \
PCRE2_SPTR output; /* Pointer to output buffer */ \
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
uint32_t oveccount; /* Count of pairs set in ovector */ \
uint32_t subscount; /* Substitution number */ \
/* ------------------------------------------------------------------ */ \
} pcre2_substitute_callout_block;
/* List the generic forms of all other functions in macros, which will be
expanded for each width below. Start with functions that give general
information. */
#define PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
/* Functions for manipulating contexts. */
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
*pcre2_general_context_copy(pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
*pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
void (*)(void *, void *), void *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_general_context_free(pcre2_general_context *);
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
*pcre2_compile_context_copy(pcre2_compile_context *); \
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
*pcre2_compile_context_create(pcre2_general_context *);\
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_compile_context_free(pcre2_compile_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
int (*)(uint32_t, void *), void *);
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
*pcre2_match_context_copy(pcre2_match_context *); \
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
*pcre2_match_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_substitute_callout(pcre2_match_context *, \
int (*)(pcre2_substitute_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_recursion_memory_management(pcre2_match_context *, \
void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_copy(pcre2_convert_context *); \
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
/* Functions concerned with compiling a pattern to PCRE internal code. */
#define PCRE2_COMPILE_FUNCTIONS \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
pcre2_compile_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_code_free(pcre2_code *); \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_code_copy(const pcre2_code *); \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_code_copy_with_tables(const pcre2_code *);
/* Functions that give information about a compiled pattern. */
#define PCRE2_PATTERN_INFO_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_callout_enumerate(const pcre2_code *, \
int (*)(pcre2_callout_enumerate_block *, void *), void *);
/* Functions for running a match and inspecting the result. */
#define PCRE2_MATCH_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
pcre2_general_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_match_data_free(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
pcre2_get_mark(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
pcre2_get_match_data_size(pcre2_match_data *); \
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
pcre2_get_ovector_count(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
*pcre2_get_ovector_pointer(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
pcre2_get_startchar(pcre2_match_data *);
/* Convenience functions for handling matched substrings. */
#define PCRE2_SUBSTRING_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_substring_free(PCRE2_UCHAR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
PCRE2_SPTR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_substring_list_free(PCRE2_SPTR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
/* Functions for serializing / deserializing compiled patterns. */
#define PCRE2_SERIALIZE_FUNCTIONS \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
PCRE2_SIZE *, pcre2_general_context *); \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
pcre2_general_context *); \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_get_number_of_codes(const uint8_t *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_serialize_free(uint8_t *);
/* Convenience function for match + substitute. */
#define PCRE2_SUBSTITUTE_FUNCTION \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
/* Functions for converting pattern source strings. */
#define PCRE2_CONVERT_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
PCRE2_SIZE *, pcre2_convert_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_converted_pattern_free(PCRE2_UCHAR *);
/* Functions for JIT processing */
#define PCRE2_JIT_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_jit_compile(pcre2_code *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_free_unused_memory(pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \
*pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_stack_free(pcre2_jit_stack *);
/* Other miscellaneous functions. */
#define PCRE2_OTHER_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
*pcre2_maketables(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
/* Define macros that generate width-specific names from generic versions. The
three-level macro scheme is necessary to get the macros expanded when we want
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
generating three versions of everything below. After that, PCRE2_SUFFIX will be
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
pcre2_compile are called by application code. */
#define PCRE2_JOIN(a,b) a ## b
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
/* Data types */
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
/* Data blocks */
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
/* Functions: the complete list in alphabetical order */
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
/* Keep this old function name for backwards compatibility */
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
/* Now generate all three sets of width-specific structures and function
prototypes. */
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
PCRE2_TYPES_LIST \
PCRE2_STRUCTURE_LIST \
PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
PCRE2_CONVERT_FUNCTIONS \
PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_FUNCTIONS \
PCRE2_PATTERN_INFO_FUNCTIONS \
PCRE2_MATCH_FUNCTIONS \
PCRE2_SUBSTRING_FUNCTIONS \
PCRE2_SERIALIZE_FUNCTIONS \
PCRE2_SUBSTITUTE_FUNCTION \
PCRE2_JIT_FUNCTIONS \
PCRE2_OTHER_FUNCTIONS
#define PCRE2_LOCAL_WIDTH 8
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
#define PCRE2_LOCAL_WIDTH 16
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
#define PCRE2_LOCAL_WIDTH 32
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
/* Undefine the list macros; they are no longer needed. */
#undef PCRE2_TYPES_LIST
#undef PCRE2_STRUCTURE_LIST
#undef PCRE2_GENERAL_INFO_FUNCTIONS
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_FUNCTIONS
#undef PCRE2_PATTERN_INFO_FUNCTIONS
#undef PCRE2_MATCH_FUNCTIONS
#undef PCRE2_SUBSTRING_FUNCTIONS
#undef PCRE2_SERIALIZE_FUNCTIONS
#undef PCRE2_SUBSTITUTE_FUNCTION
#undef PCRE2_JIT_FUNCTIONS
#undef PCRE2_OTHER_FUNCTIONS
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
#undef PCRE2_SUFFIX
#ifndef PCRE2_CODE_UNIT_WIDTH
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
#error Use 8, 16, or 32; or 0 for a multi-width application.
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
PCRE2_CODE_UNIT_WIDTH == 16 || \
PCRE2_CODE_UNIT_WIDTH == 32
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
#elif PCRE2_CODE_UNIT_WIDTH == 0
#undef PCRE2_JOIN
#undef PCRE2_GLUE
#define PCRE2_SUFFIX(a) a
#else
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
/* End of pcre2.h */

@ -2344,7 +2344,7 @@ if (ptr > *nameptr + MAX_NAME_SIZE)
*errorcodeptr = ERR48;
goto FAILED;
}
*namelenptr = ptr - *nameptr;
*namelenptr = (uint32_t)(ptr - *nameptr);
/* Subpattern names must not be empty, and their terminator is checked here.
(What follows a verb or alpha assertion name is checked separately.) */
@ -4331,6 +4331,7 @@ while (ptr < ptrend)
{
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
minor = (*ptr++ - CHAR_0) * 10;
if (ptr >= ptrend) goto BAD_VERSION_CONDITION;
if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
goto BAD_VERSION_CONDITION;

303
pcre2/src/pcre2_dftables.c Normal file

@ -0,0 +1,303 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This is a freestanding support program to generate a file containing
character tables for PCRE2. The tables are built using the pcre2_maketables()
function, which is part of the PCRE2 API. By default, the system's "C" locale
is used rather than what the building user happens to have set, but the -L
option can be used to select the current locale from the LC_ALL environment
variable. By default, the tables are written in source form, but if -b is
given, they are written in binary. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <locale.h>
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
#include "pcre2_internal.h"
#define PCRE2_DFTABLES /* pcre2_maketables.c notices this */
#include "pcre2_maketables.c"
static const char *classlist[] =
{
"space", "xdigit", "digit", "upper", "lower",
"word", "graph", "print", "punct", "cntrl"
};
/*************************************************
* Usage *
*************************************************/
static void
usage(void)
{
(void)fprintf(stderr,
"Usage: pcre2_dftables [options] <output file>\n"
" -b Write output in binary (default is source code)\n"
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
);
}
/*************************************************
* Entry point *
*************************************************/
int main(int argc, char **argv)
{
FILE *f;
int i;
int nclass = 0;
BOOL binary = FALSE;
char *env = (char *)"C";
const unsigned char *tables;
const unsigned char *base_of_tables;
/* Process options */
for (i = 1; i < argc; i++)
{
char *arg = argv[i];
if (*arg != '-') break;
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
{
usage();
return 0;
}
else if (strcmp(arg, "-L") == 0)
{
if (setlocale(LC_ALL, "") == NULL)
{
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
return 1;
}
env = getenv("LC_ALL");
}
else if (strcmp(arg, "-b") == 0)
binary = TRUE;
else
{
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
return 1;
}
}
if (i != argc - 1)
{
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
return 1;
}
/* Make the tables */
tables = maketables();
base_of_tables = tables;
f = fopen(argv[i], "wb");
if (f == NULL)
{
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
return 1;
}
/* If -b was specified, we write the tables in binary. */
if (binary)
{
int yield = 0;
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
if (len != TABLES_LENGTH)
{
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
"instead of %d\n", (int)len, TABLES_LENGTH);
yield = 1;
}
fclose(f);
free((void *)base_of_tables);
return yield;
}
/* Write the tables as source code for inclusion in the PCRE2 library. There
are several fprintf() calls here, because gcc in pedantic mode complains about
the very long string otherwise. */
(void)fprintf(f,
"/*************************************************\n"
"* Perl-Compatible Regular Expressions *\n"
"*************************************************/\n\n"
"/* This file was automatically written by the pcre2_dftables auxiliary\n"
"program. It contains character tables that are used when no external\n"
"tables are passed to PCRE2 by the application that calls it. The tables\n"
"are used only for characters whose code values are less than 256. */\n\n");
(void)fprintf(f,
"/* This set of tables was written in the %s locale. */\n\n", env);
(void)fprintf(f,
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
"to build alternative versions of this file. This is necessary if you are\n"
"running in an EBCDIC environment, or if you want to default to a different\n"
"encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
"these tables in the \"C\" locale by default. This happens automatically if\n"
"PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
"pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
"locale. */\n\n");
/* Force config.h in z/OS */
#if defined NATIVE_ZOS
(void)fprintf(f,
"/* For z/OS, config.h is forced */\n"
"#ifndef HAVE_CONFIG_H\n"
"#define HAVE_CONFIG_H 1\n"
"#endif\n\n");
#endif
(void)fprintf(f,
"/* The following #include is present because without it gcc 4.x may remove\n"
"the array definition from the final binary if PCRE2 is built into a static\n"
"library and dead code stripping is activated. This leads to link errors.\n"
"Pulling in the header ensures that the array gets flagged as \"someone\n"
"outside this compilation unit might reference this\" and so it will always\n"
"be supplied to the linker. */\n\n");
(void)fprintf(f,
"#ifdef HAVE_CONFIG_H\n"
"#include \"config.h\"\n"
"#endif\n\n"
"#include \"pcre2_internal.h\"\n\n");
(void)fprintf(f,
"const uint8_t PRIV(default_tables)[] = {\n\n"
"/* This table is a lower casing table. */\n\n");
(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
fprintf(f, "%3d", *tables++);
if (i != 255) fprintf(f, ",");
}
(void)fprintf(f, ",\n\n");
(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
fprintf(f, "%3d", *tables++);
if (i != 255) fprintf(f, ",");
}
(void)fprintf(f, ",\n\n");
(void)fprintf(f,
"/* This table contains bit maps for various character classes. Each map is 32\n"
"bytes long and the bits run from the least significant end of each byte. The\n"
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
"graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
(void)fprintf(f, " ");
for (i = 0; i < cbit_length; i++)
{
if ((i & 7) == 0 && i != 0)
{
if ((i & 31) == 0) (void)fprintf(f, "\n");
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
(void)fprintf(f, "\n ");
}
(void)fprintf(f, "0x%02x", *tables++);
if (i != cbit_length - 1) (void)fprintf(f, ",");
}
(void)fprintf(f, ",\n\n");
(void)fprintf(f,
"/* This table identifies various classes of character by individual bits:\n"
" 0x%02x white space character\n"
" 0x%02x letter\n"
" 0x%02x lower case letter\n"
" 0x%02x decimal digit\n"
" 0x%02x alphanumeric or '_'\n*/\n\n",
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0)
{
(void)fprintf(f, " /* ");
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
else (void)fprintf(f, "%3d-", i-8);
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
else (void)fprintf(f, "%3d", i-1);
(void)fprintf(f, " */\n ");
}
(void)fprintf(f, "0x%02x", *tables++);
if (i != 255) (void)fprintf(f, ",");
}
(void)fprintf(f, "};/* ");
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
else (void)fprintf(f, "%3d-", i-8);
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
else (void)fprintf(f, "%3d", i-1);
(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
fclose(f);
free((void *)base_of_tables);
return 0;
}
/* End of pcre2_dftables.c */

@ -0,0 +1,365 @@
/***************************************************************************
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
tries to compile and match it, deriving options from the string itself. If
STANDALONE is defined, a main program that calls the driver with the contents
of specified files is compiled, and commentary on what is happening is output.
If an argument starts with '=' the rest of it it is taken as a literal string
rather than a file name. This allows easy testing of short strings.
Written by Philip Hazel, October 2016
***************************************************************************/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#define MAX_MATCH_SIZE 1000
#define DFA_WORKSPACE_COUNT 100
#define ALLOWED_COMPILE_OPTIONS \
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
PCRE2_NO_AUTO_CAPTURE| \
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
PCRE2_UTF)
#define ALLOWED_MATCH_OPTIONS \
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
/* This is the callout function. Its only purpose is to halt matching if there
are more than 100 callouts, as one way of stopping too much time being spent on
fruitless matches. The callout data is a pointer to the counter. */
static int callout_function(pcre2_callout_block *cb, void *callout_data)
{
(void)cb; /* Avoid unused parameter warning */
*((uint32_t *)callout_data) += 1;
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
}
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
"no previous prototype" warning when compiling at high warning level. */
int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
/* Here's the driving function. */
int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
{
uint32_t compile_options;
uint32_t match_options;
pcre2_match_data *match_data = NULL;
pcre2_match_context *match_context = NULL;
size_t match_size;
int dfa_workspace[DFA_WORKSPACE_COUNT];
int r1, r2;
int i;
if (size < 1) return 0;
/* Limiting the length of the subject for matching stops fruitless searches
in large trees taking too much time. */
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
/* Figure out some options to use. Initialize the random number to ensure
repeatability. Ensure that we get a 32-bit unsigned random number for testing
options. (RAND_MAX is required to be at least 32767, but is commonly
2147483647, which excludes the top bit.) */
srand((unsigned int)(data[size/2]));
r1 = rand();
r2 = rand();
/* Ensure that all undefined option bits are zero (waste of time trying them)
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
\C in random patterns is highly likely to cause a crash. */
compile_options =
((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
PCRE2_NEVER_BACKSLASH_C;
match_options =
((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
allowed together and just give an immediate error return. */
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
/* Do the compile with and without the options, and after a successful compile,
likewise do the match with and without the options. */
for (i = 0; i < 2; i++)
{
uint32_t callout_count;
int errorcode;
PCRE2_SIZE erroroffset;
pcre2_code *code;
#ifdef STANDALONE
printf("Compile options %.8x never_backslash_c", compile_options);
printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
#endif
code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
&errorcode, &erroroffset, NULL);
/* Compilation succeeded */
if (code != NULL)
{
int j;
uint32_t save_match_options = match_options;
/* Create match data and context blocks only when we first need them. Set
low match and depth limits to avoid wasting too much searching large
pattern trees. Almost all matches are going to fail. */
if (match_data == NULL)
{
match_data = pcre2_match_data_create(32, NULL);
if (match_data == NULL)
{
#ifdef STANDALONE
printf("** Failed to create match data block\n");
#endif
return 0;
}
}
if (match_context == NULL)
{
match_context = pcre2_match_context_create(NULL);
if (match_context == NULL)
{
#ifdef STANDALONE
printf("** Failed to create match context block\n");
#endif
return 0;
}
(void)pcre2_set_match_limit(match_context, 100);
(void)pcre2_set_depth_limit(match_context, 100);
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
}
/* Match twice, with and without options. */
for (j = 0; j < 2; j++)
{
#ifdef STANDALONE
printf("Match options %.8x", match_options);
printf("%s%s%s%s%s%s%s%s%s%s\n",
((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
#endif
callout_count = 0;
errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
match_options, match_data, match_context);
#ifdef STANDALONE
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
{
unsigned char buffer[256];
pcre2_get_error_message(errorcode, buffer, 256);
printf("Match failed: error %d: %s\n", errorcode, buffer);
}
#endif
match_options = 0; /* For second time */
}
/* Match with DFA twice, with and without options. */
match_options = save_match_options & ~PCRE2_NO_JIT; /* Not valid for DFA */
for (j = 0; j < 2; j++)
{
#ifdef STANDALONE
printf("DFA match options %.8x", match_options);
printf("%s%s%s%s%s%s%s%s%s\n",
((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
#endif
callout_count = 0;
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
(PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
dfa_workspace, DFA_WORKSPACE_COUNT);
#ifdef STANDALONE
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
{
unsigned char buffer[256];
pcre2_get_error_message(errorcode, buffer, 256);
printf("Match failed: error %d: %s\n", errorcode, buffer);
}
#endif
match_options = 0; /* For second time */
}
match_options = save_match_options; /* Reset for the second compile */
pcre2_code_free(code);
}
/* Compilation failed */
else
{
unsigned char buffer[256];
pcre2_get_error_message(errorcode, buffer, 256);
#ifdef STANDALONE
printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
#else
if (strstr((const char *)buffer, "internal error") != NULL) abort();
#endif
}
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
}
if (match_data != NULL) pcre2_match_data_free(match_data);
if (match_context != NULL) pcre2_match_context_free(match_context);
return 0;
}
/* Optional main program. */
#ifdef STANDALONE
int main(int argc, char **argv)
{
int i;
if (argc < 2)
{
printf("** No arguments given\n");
return 0;
}
for (i = 1; i < argc; i++)
{
size_t filelen;
size_t readsize;
unsigned char *buffer;
FILE *f;
/* Handle a literal string. Copy to an exact size buffer so that checks for
overrunning work. */
if (argv[i][0] == '=')
{
readsize = strlen(argv[i]) - 1;
printf("------ <Literal> ------\n");
printf("Length = %lu\n", readsize);
printf("%.*s\n", (int)readsize, argv[i]+1);
buffer = (unsigned char *)malloc(readsize);
if (buffer == NULL)
printf("** Failed to allocate %lu bytes of memory\n", readsize);
else
{
memcpy(buffer, argv[i]+1, readsize);
LLVMFuzzerTestOneInput(buffer, readsize);
free(buffer);
}
continue;
}
/* Handle a string given in a file */
f = fopen(argv[i], "rb");
if (f == NULL)
{
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
continue;
}
printf("------ %s ------\n", argv[i]);
fseek(f, 0, SEEK_END);
filelen = ftell(f);
fseek(f, 0, SEEK_SET);
buffer = (unsigned char *)malloc(filelen);
if (buffer == NULL)
{
printf("** Failed to allocate %lu bytes of memory\n", filelen);
fclose(f);
continue;
}
readsize = fread(buffer, 1, filelen, f);
fclose(f);
if (readsize != filelen)
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
else
{
printf("Length = %lu\n", filelen);
LLVMFuzzerTestOneInput(buffer, filelen);
}
free(buffer);
}
return 0;
}
#endif /* STANDALONE */
/* End */

@ -1466,9 +1466,9 @@ do
default:
accelerated_start = NULL;
fast_forward_allowed = FALSE;
break;
continue;
}
continue;
break;
case OP_ONCE:
case OP_BRA:
@ -1834,57 +1834,57 @@ while (cc < ccend)
case OP_BRAZERO:
case OP_BRAMINZERO:
case OP_BRAPOSZERO:
repeat_check = FALSE;
size = 1;
repeat_check = FALSE;
break;
CASE_ITERATOR_PRIVATE_DATA_1
space = 1;
size = -2;
space = 1;
break;
CASE_ITERATOR_PRIVATE_DATA_2A
space = 2;
size = -2;
space = 2;
break;
CASE_ITERATOR_PRIVATE_DATA_2B
space = 2;
size = -(2 + IMM2_SIZE);
space = 2;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
space = 1;
size = 1;
space = 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
size = 1;
if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
space = 2;
size = 1;
break;
case OP_TYPEUPTO:
size = 1 + IMM2_SIZE;
if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
space = 2;
size = 1 + IMM2_SIZE;
break;
case OP_TYPEMINUPTO:
space = 2;
size = 1 + IMM2_SIZE;
space = 2;
break;
case OP_CLASS:
case OP_NCLASS:
space = get_class_iterator_size(cc + size);
size = 1 + 32 / sizeof(PCRE2_UCHAR);
space = get_class_iterator_size(cc + size);
break;
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
case OP_XCLASS:
space = get_class_iterator_size(cc + size);
size = GET(cc, 1);
space = get_class_iterator_size(cc + size);
break;
#endif
@ -4578,7 +4578,14 @@ if (common->nltype != NLTYPE_ANY)
/* All newlines are ascii, just skip intermediate octets. */
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
loop = LABEL();
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
else
{
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@ -6161,9 +6168,9 @@ static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_label *loop;
struct sljit_jump *lastchar;
struct sljit_jump *lastchar = NULL;
struct sljit_jump *firstchar;
struct sljit_jump *quit;
struct sljit_jump *quit = NULL;
struct sljit_jump *foundcr = NULL;
struct sljit_jump *notfoundnl;
jump_list *newline = NULL;
@ -6176,39 +6183,71 @@ if (common->match_end_ptr != 0)
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
{
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (HAS_VIRTUAL_REGISTERS)
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
{
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
if (HAS_VIRTUAL_REGISTERS)
{
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
}
else
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
}
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
}
else
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
}
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (HAS_VIRTUAL_REGISTERS)
{
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
}
else
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
}
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
loop = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
loop = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
JUMPHERE(quit);
JUMPHERE(lastchar);
}
JUMPHERE(quit);
JUMPHERE(firstchar);
JUMPHERE(lastchar);
if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
@ -6225,22 +6264,59 @@ else
/* Example: match /^/ to \r\n from offset 1. */
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
move_back(common, NULL, FALSE);
if (common->nltype == NLTYPE_ANY)
move_back(common, NULL, FALSE);
else
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
loop = LABEL();
common->ff_newline_shortcut = loop;
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
check_newlinechar(common, common->nltype, &newline, FALSE);
set_jumps(newline, loop);
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
{
if (common->nltype == NLTYPE_ANYCRLF)
{
fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
if (common->mode != PCRE2_JIT_COMPLETE)
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
}
else
{
fast_forward_char_simd(common, common->newline, common->newline, 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
if (common->mode != PCRE2_JIT_COMPLETE)
{
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
}
}
}
else
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
{
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
check_newlinechar(common, common->nltype, &newline, FALSE);
set_jumps(newline, loop);
}
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
{
quit = JUMP(SLJIT_JUMP);
JUMPHERE(foundcr);
if (quit == NULL)
{
quit = JUMP(SLJIT_JUMP);
JUMPHERE(foundcr);
}
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
@ -6252,7 +6328,9 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
JUMPHERE(notfoundnl);
JUMPHERE(quit);
}
JUMPHERE(lastchar);
if (lastchar)
JUMPHERE(lastchar);
JUMPHERE(firstchar);
if (common->match_end_ptr != 0)
@ -6493,9 +6571,11 @@ if (common->invalid_utf)
if (common->mode != PCRE2_JIT_COMPLETE)
{
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
move_back(common, NULL, TRUE);
check_start_used_ptr(common);
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
}
}
@ -7594,25 +7674,43 @@ if (needstype || needsscript)
}
cc = ccbegin;
if (needstype)
{
/* TMP2 has already been shifted by 2 */
if (!needschar)
{
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
}
else
{
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
typereg = RETURN_ADDR;
}
}
else if (needschar)
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
}
if (needschar)
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
if (needstype)
else if (needstype)
{
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
if (!needschar)
{
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
}
else
{
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
@ -7620,6 +7718,8 @@ if (needstype || needsscript)
typereg = RETURN_ADDR;
}
}
else if (needschar)
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
}
#endif /* SUPPORT_UNICODE */
@ -13581,7 +13681,7 @@ if (common->has_then)
set_then_offsets(common, common->start, NULL);
}
compiler = sljit_create_compiler(allocator_data);
compiler = sljit_create_compiler(allocator_data, NULL);
if (!compiler)
{
SLJIT_FREE(common->optimized_cbracket, allocator_data);
@ -13983,7 +14083,7 @@ else
{
/* This case is highly unlikely since we just recently
freed a lot of memory. Not impossible though. */
sljit_free_code(executable_func);
sljit_free_code(executable_func, NULL);
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
@ -14097,13 +14197,13 @@ if (executable_allocator_is_working == 0)
/* Checks whether the executable allocator is working. This check
might run multiple times in multi-threaded environments, but the
result should not be affected by it. */
void *ptr = SLJIT_MALLOC_EXEC(32);
void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
executable_allocator_is_working = -1;
if (ptr != NULL)
{
SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr));
SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
executable_allocator_is_working = 1;
}
}

@ -89,7 +89,7 @@ int i;
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
{
if (functions->executable_funcs[i] != NULL)
sljit_free_code(functions->executable_funcs[i]);
sljit_free_code(functions->executable_funcs[i], NULL);
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
}

@ -0,0 +1,347 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
This module by Zoltan Herczeg and Sebastian Pop
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
# if defined(FFCS)
# if defined(FF_UTF)
# define FF_FUN ffcs_utf
# else
# define FF_FUN ffcs
# endif
# elif defined(FFCS_2)
# if defined(FF_UTF)
# define FF_FUN ffcs_2_utf
# else
# define FF_FUN ffcs_2
# endif
# elif defined(FFCS_MASK)
# if defined(FF_UTF)
# define FF_FUN ffcs_mask_utf
# else
# define FF_FUN ffcs_mask
# endif
# elif defined(FFCPS_0)
# if defined (FF_UTF)
# define FF_FUN ffcps_0_utf
# else
# define FF_FUN ffcps_0
# endif
# elif defined (FFCPS_1)
# if defined (FF_UTF)
# define FF_FUN ffcps_1_utf
# else
# define FF_FUN ffcps_1
# endif
# elif defined (FFCPS_DEFAULT)
# if defined (FF_UTF)
# define FF_FUN ffcps_default_utf
# else
# define FF_FUN ffcps_default
# endif
# endif
static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 *str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
#undef FF_FUN
{
quad_word qw;
int_char ic;
SLJIT_UNUSED_ARG(offs1);
SLJIT_UNUSED_ARG(offs2);
ic.x = chars;
#if defined(FFCS)
sljit_u8 c1 = ic.c.c1;
vect_t vc1 = VDUPQ(c1);
#elif defined(FFCS_2)
sljit_u8 c1 = ic.c.c1;
vect_t vc1 = VDUPQ(c1);
sljit_u8 c2 = ic.c.c2;
vect_t vc2 = VDUPQ(c2);
#elif defined(FFCS_MASK)
sljit_u8 c1 = ic.c.c1;
vect_t vc1 = VDUPQ(c1);
sljit_u8 mask = ic.c.c2;
vect_t vmask = VDUPQ(mask);
#endif
#if defined(FFCPS)
compare_type compare1_type = compare_match1;
compare_type compare2_type = compare_match1;
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
PCRE2_UCHAR char1a = ic.c.c1;
PCRE2_UCHAR char2a = ic.c.c3;
# ifdef FFCPS_CHAR1A2A
cmp1a = VDUPQ(char1a);
cmp2a = VDUPQ(char2a);
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
# else
PCRE2_UCHAR char1b = ic.c.c2;
PCRE2_UCHAR char2b = ic.c.c4;
if (char1a == char1b)
{
cmp1a = VDUPQ(char1a);
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
}
else
{
sljit_u32 bit1 = char1a ^ char1b;
if (is_powerof2(bit1))
{
compare1_type = compare_match1i;
cmp1a = VDUPQ(char1a | bit1);
cmp1b = VDUPQ(bit1);
}
else
{
compare1_type = compare_match2;
cmp1a = VDUPQ(char1a);
cmp1b = VDUPQ(char1b);
}
}
if (char2a == char2b)
{
cmp2a = VDUPQ(char2a);
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
}
else
{
sljit_u32 bit2 = char2a ^ char2b;
if (is_powerof2(bit2))
{
compare2_type = compare_match1i;
cmp2a = VDUPQ(char2a | bit2);
cmp2b = VDUPQ(bit2);
}
else
{
compare2_type = compare_match2;
cmp2a = VDUPQ(char2a);
cmp2b = VDUPQ(char2b);
}
}
# endif
str_ptr += IN_UCHARS(offs1);
#endif
#if PCRE2_CODE_UNIT_WIDTH != 8
vect_t char_mask = VDUPQ(0xff);
#endif
#if defined(FF_UTF)
restart:;
#endif
#if defined(FFCPS)
sljit_u8 *p1 = str_ptr - diff;
#endif
sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf);
vect_t data = VLD1Q(str_ptr);
#if PCRE2_CODE_UNIT_WIDTH != 8
data = VANDQ(data, char_mask);
#endif
#if defined(FFCS)
vect_t eq = VCEQQ(data, vc1);
#elif defined(FFCS_2)
vect_t eq1 = VCEQQ(data, vc1);
vect_t eq2 = VCEQQ(data, vc2);
vect_t eq = VORRQ(eq1, eq2);
#elif defined(FFCS_MASK)
vect_t eq = VORRQ(data, vmask);
eq = VCEQQ(eq, vc1);
#elif defined(FFCPS)
# if defined(FFCPS_DIFF1)
vect_t prev_data = data;
# endif
vect_t data2;
if (p1 < str_ptr)
{
data2 = VLD1Q(str_ptr - diff);
#if PCRE2_CODE_UNIT_WIDTH != 8
data2 = VANDQ(data2, char_mask);
#endif
}
else
data2 = shift_left_n_lanes(data, offs1 - offs2);
if (compare1_type == compare_match1)
data = VCEQQ(data, cmp1a);
else
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
if (compare2_type == compare_match1)
data2 = VCEQQ(data2, cmp2a);
else
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
vect_t eq = VANDQ(data, data2);
#endif
VST1Q(qw.mem, eq);
/* Ignore matches before the first STR_PTR. */
if (align_offset < 8)
{
qw.dw[0] >>= align_offset * 8;
if (qw.dw[0])
{
str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
goto match;
}
if (qw.dw[1])
{
str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
goto match;
}
}
else
{
qw.dw[1] >>= (align_offset - 8) * 8;
if (qw.dw[1])
{
str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
goto match;
}
}
str_ptr += 16;
while (str_ptr < str_end)
{
vect_t orig_data = VLD1Q(str_ptr);
#if PCRE2_CODE_UNIT_WIDTH != 8
orig_data = VANDQ(orig_data, char_mask);
#endif
data = orig_data;
#if defined(FFCS)
eq = VCEQQ(data, vc1);
#elif defined(FFCS_2)
eq1 = VCEQQ(data, vc1);
eq2 = VCEQQ(data, vc2);
eq = VORRQ(eq1, eq2);
#elif defined(FFCS_MASK)
eq = VORRQ(data, vmask);
eq = VCEQQ(eq, vc1);
#endif
#if defined(FFCPS)
# if defined (FFCPS_DIFF1)
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
# else
data2 = VLD1Q(str_ptr - diff);
# if PCRE2_CODE_UNIT_WIDTH != 8
data2 = VANDQ(data2, char_mask);
# endif
# endif
# ifdef FFCPS_CHAR1A2A
data = VCEQQ(data, cmp1a);
data2 = VCEQQ(data2, cmp2a);
# else
if (compare1_type == compare_match1)
data = VCEQQ(data, cmp1a);
else
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
if (compare2_type == compare_match1)
data2 = VCEQQ(data2, cmp2a);
else
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
# endif
eq = VANDQ(data, data2);
#endif
VST1Q(qw.mem, eq);
if (qw.dw[0])
str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
else if (qw.dw[1])
str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
else {
str_ptr += 16;
#if defined (FFCPS_DIFF1)
prev_data = orig_data;
#endif
continue;
}
match:;
if (str_ptr >= str_end)
/* Failed match. */
return NULL;
#if defined(FF_UTF)
if (utf_continue(str_ptr + IN_UCHARS(-offs1)))
{
/* Not a match. */
str_ptr += IN_UCHARS(1);
goto restart;
}
#endif
/* Match. */
#if defined (FFCPS)
str_ptr -= IN_UCHARS(offs1);
#endif
return str_ptr;
}
/* Failed match. */
return NULL;
}

File diff suppressed because it is too large Load Diff

@ -6115,8 +6115,8 @@ BOOL has_req_cu = FALSE;
BOOL startline;
#if PCRE2_CODE_UNIT_WIDTH == 8
BOOL memchr_not_found_first_cu = FALSE;
BOOL memchr_not_found_first_cu2 = FALSE;
BOOL memchr_not_found_first_cu;
BOOL memchr_not_found_first_cu2;
#endif
PCRE2_UCHAR first_cu = 0;
@ -6709,6 +6709,11 @@ FRAGMENT_RESTART:
start_partial = match_partial = NULL;
mb->hitend = FALSE;
#if PCRE2_CODE_UNIT_WIDTH == 8
memchr_not_found_first_cu = FALSE;
memchr_not_found_first_cu2 = FALSE;
#endif
for(;;)
{
PCRE2_SPTR new_start_match;
@ -7187,6 +7192,7 @@ if (utf && end_subject != true_end_subject &&
starting code units in 8-bit and 16-bit modes. */
start_match = end_subject + 1;
#if PCRE2_CODE_UNIT_WIDTH != 32
while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
start_match++;

836
pcre2/src/pcre2_printint.c Normal file

@ -0,0 +1,836 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting
local functions. This source file is #included in pcre2test.c at each supported
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
that comprise the library. It can also optionally be included in
pcre2_compile.c for detailed debugging in error situations. */
/* Tables of operator names. The same 8-bit table is used for all code unit
widths, so it must be defined only once. The list itself is defined in
pcre2_internal.h, which is #included by pcre2test before this file. */
#ifndef OP_LISTS_DEFINED
static const char *OP_names[] = { OP_NAME_LIST };
#define OP_LISTS_DEFINED
#endif
/* The functions and tables herein must all have mode-dependent names. */
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
#define print_char PCRE2_SUFFIX(print_char_)
#define print_custring PCRE2_SUFFIX(print_custring_)
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
#define print_prop PCRE2_SUFFIX(print_prop_)
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre2_internal.h.
The contents of the table are, however, mode-dependent. */
static const uint8_t OP_lengths[] = { OP_LENGTHS };
/*************************************************
* Print one character from a string *
*************************************************/
/* In UTF mode the character may occupy more than one code unit.
Arguments:
f file to write to
ptr pointer to first code unit of the character
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
Returns: number of additional code units used
*/
static unsigned int
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
{
uint32_t c = *ptr;
BOOL one_code_unit = !utf;
/* If UTF is supported and requested, check for a valid single code unit. */
#ifdef SUPPORT_UNICODE
if (utf)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
one_code_unit = c < 0x80;
#elif PCRE2_CODE_UNIT_WIDTH == 16
one_code_unit = (c & 0xfc00) != 0xd800;
#else
one_code_unit = (c & 0xfffff800u) != 0xd800u;
#endif /* CODE_UNIT_WIDTH */
}
#endif /* SUPPORT_UNICODE */
/* Handle a valid one-code-unit character at any width. */
if (one_code_unit)
{
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c < 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%02x}", c);
return 0;
}
/* Code for invalid UTF code units and multi-unit UTF characters is different
for each width. If UTF is not supported, control should never get here, but we
need a return statement to keep the compiler happy. */
#ifndef SUPPORT_UNICODE
return 0;
#else
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
\X instead of \x as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 8
if ((c & 0xc0) != 0xc0)
{
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
return 0;
}
else
{
int i;
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
int s = 6*a;
c = (c & PRIV(utf8_table3)[a]) << s;
for (i = 1; i <= a; i++)
{
if ((ptr[i] & 0xc0) != 0x80)
{
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
return i - 1;
}
s -= 6;
c |= (ptr[i] & 0x3f) << s;
}
fprintf(f, "\\x{%x}", c);
return a;
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
Print it with \X instead of \x as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 16
if ((ptr[1] & 0xfc00) != 0xdc00)
{
fprintf(f, "\\X{%x}", c);
return 0;
}
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
fprintf(f, "\\x{%x}", c);
return 1;
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
/* For UTF-32 we get here only for a malformed code unit, which should only
occur if the sanity check has been turned off. Print it with \X instead of \x
as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 32
fprintf(f, "\\X{%x}", c);
return 0;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
#endif /* SUPPORT_UNICODE */
}
/*************************************************
* Print string as a list of code units *
*************************************************/
/* These take no account of UTF as they always print each individual code unit.
The string is zero-terminated for print_custring(); the length is given for
print_custring_bylen().
Arguments:
f file to write to
ptr point to the string
len length for print_custring_bylen()
Returns: nothing
*/
static void
print_custring(FILE *f, PCRE2_SPTR ptr)
{
while (*ptr != '\0')
{
uint32_t c = *ptr++;
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
}
}
static void
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
{
for (; len > 0; len--)
{
uint32_t c = *ptr++;
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
}
}
/*************************************************
* Find Unicode property name *
*************************************************/
/* When there is no UTF/UCP support, the table of names does not exist. This
function should not be called in such configurations, because a pattern that
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
into the main code, however, we just put one into this function. */
static const char *
get_ucpname(unsigned int ptype, unsigned int pvalue)
{
#ifdef SUPPORT_UNICODE
int i;
for (i = PRIV(utt_size) - 1; i >= 0; i--)
{
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
}
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
#else /* No UTF support */
(void)ptype;
(void)pvalue;
return "??";
#endif /* SUPPORT_UNICODE */
}
/*************************************************
* Print Unicode property value *
*************************************************/
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
pseudo-property that contains a pointer to a list of case-equivalent
characters.
Arguments:
f file to write to
code pointer in the compiled code
before text to print before
after text to print after
Returns: nothing
*/
static void
print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
{
if (code[1] != PT_CLIST)
{
fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
code[2]), after);
}
else
{
const char *not = (*code == OP_PROP)? "" : "not ";
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
fprintf (f, "%s%sclist", before, not);
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
fprintf(f, "%s", after);
}
}
/*************************************************
* Print compiled pattern *
*************************************************/
/* The print_lengths flag controls whether offsets and lengths of items are
printed. Lenths can be turned off from pcre2test so that automatic tests on
bytecode can be written that do not depend on the value of LINK_SIZE.
Arguments:
re a compiled pattern
f the file to write to
print_lengths show various lengths
Returns: nothing
*/
static void
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
{
PCRE2_SPTR codestart, nametable, code;
uint32_t nesize = re->name_entry_size;
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
code = codestart = nametable + re->name_count * re->name_entry_size;
for(;;)
{
PCRE2_SPTR ccode;
uint32_t c;
int i;
const char *flag = " ";
unsigned int extra = 0;
if (print_lengths)
fprintf(f, "%3d ", (int)(code - codestart));
else
fprintf(f, " ");
switch(*code)
{
/* ========================================================================== */
/* These cases are never obeyed. This is a fudge that causes a compile-
time error if the vectors OP_names or OP_lengths, which are indexed
by opcode, are not the correct length. It seems to be the only way to do
such a check at compile time, as the sizeof() operator does not work in
the C preprocessor. */
case OP_TABLE_LENGTH:
case OP_TABLE_LENGTH +
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
(sizeof(OP_lengths) == OP_TABLE_LENGTH)):
return;
/* ========================================================================== */
case OP_END:
fprintf(f, " %s\n", OP_names[*code]);
fprintf(f, "------------------------------------------------------------------\n");
return;
case OP_CHAR:
fprintf(f, " ");
do
{
code++;
code += 1 + print_char(f, code, utf);
}
while (*code == OP_CHAR);
fprintf(f, "\n");
continue;
case OP_CHARI:
fprintf(f, " /i ");
do
{
code++;
code += 1 + print_char(f, code, utf);
}
while (*code == OP_CHARI);
fprintf(f, "\n");
continue;
case OP_CBRA:
case OP_CBRAPOS:
case OP_SCBRA:
case OP_SCBRAPOS:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
break;
case OP_BRA:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
case OP_KETRMAX:
case OP_KETRMIN:
case OP_KETRPOS:
case OP_ALT:
case OP_KET:
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ASSERT_NA:
case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_COND:
case OP_SCOND:
case OP_REVERSE:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;
case OP_CLOSE:
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
break;
case OP_CREF:
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
break;
case OP_DNCREF:
{
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s Cond ref <", flag);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
break;
case OP_RREF:
c = GET2(code, 1);
if (c == RREF_ANY)
fprintf(f, " Cond recurse any");
else
fprintf(f, " Cond recurse %d", c);
break;
case OP_DNRREF:
{
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s Cond recurse <", flag);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
break;
case OP_FALSE:
fprintf(f, " Cond false");
break;
case OP_TRUE:
fprintf(f, " Cond true");
break;
case OP_STARI:
case OP_MINSTARI:
case OP_POSSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_POSPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
case OP_POSQUERYI:
flag = "/i";
/* Fall through */
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
fprintf(f, " %s ", flag);
if (*code >= OP_TYPESTAR)
{
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
{
print_prop(f, code + 1, "", " ");
extra = 2;
}
else fprintf(f, "%s", OP_names[code[1]]);
}
else extra = print_char(f, code+1, utf);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_EXACTI:
case OP_UPTOI:
case OP_MINUPTOI:
case OP_POSUPTOI:
flag = "/i";
/* Fall through */
case OP_EXACT:
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
fprintf(f, " %s ", flag);
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
fprintf(f, "{");
if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
break;
case OP_TYPEEXACT:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
{
print_prop(f, code + IMM2_SIZE + 1, " ", " ");
extra = 2;
}
else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
fprintf(f, "{");
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
break;
case OP_NOTI:
flag = "/i";
/* Fall through */
case OP_NOT:
fprintf(f, " %s [^", flag);
extra = print_char(f, code + 1, utf);
fprintf(f, "]");
break;
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPOSSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTPOSPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTPOSQUERYI:
flag = "/i";
/* Fall through */
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPOSSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTPOSPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTPOSQUERY:
fprintf(f, " %s [^", flag);
extra = print_char(f, code + 1, utf);
fprintf(f, "]%s", OP_names[*code]);
break;
case OP_NOTEXACTI:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTPOSUPTOI:
flag = "/i";
/* Fall through */
case OP_NOTEXACT:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTPOSUPTO:
fprintf(f, " %s [^", flag);
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
fprintf(f, "]{");
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
else
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
break;
case OP_RECURSE:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;
case OP_REFI:
flag = "/i";
/* Fall through */
case OP_REF:
fprintf(f, " %s \\%d", flag, GET2(code,1));
ccode = code + OP_lengths[*code];
goto CLASS_REF_REPEAT;
case OP_DNREFI:
flag = "/i";
/* Fall through */
case OP_DNREF:
{
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s \\k<", flag);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
ccode = code + OP_lengths[*code];
goto CLASS_REF_REPEAT;
case OP_CALLOUT:
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
GET(code, 1), GET(code, 1 + LINK_SIZE));
break;
case OP_CALLOUT_STR:
c = code[1 + 4*LINK_SIZE];
fprintf(f, " %s %c", OP_names[*code], c);
extra = GET(code, 1 + 2*LINK_SIZE);
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
if (c == PRIV(callout_start_delims)[i])
{
c = PRIV(callout_end_delims)[i];
break;
}
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
GET(code, 1 + LINK_SIZE));
break;
case OP_PROP:
case OP_NOTPROP:
print_prop(f, code, " ", "");
break;
/* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
in having this code always here, and it makes it less messy without all
those #ifdefs. */
case OP_CLASS:
case OP_NCLASS:
case OP_XCLASS:
{
unsigned int min, max;
BOOL printmap;
BOOL invertmap = FALSE;
uint8_t *map;
uint8_t inverted_map[32];
fprintf(f, " [");
if (*code == OP_XCLASS)
{
extra = GET(code, 1);
ccode = code + LINK_SIZE + 1;
printmap = (*ccode & XCL_MAP) != 0;
if ((*ccode & XCL_NOT) != 0)
{
invertmap = (*ccode & XCL_HASPROP) == 0;
fprintf(f, "^");
}
ccode++;
}
else
{
printmap = TRUE;
ccode = code + 1;
}
/* Print a bit map */
if (printmap)
{
map = (uint8_t *)ccode;
if (invertmap)
{
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
map = inverted_map;
}
for (i = 0; i < 256; i++)
{
if ((map[i/8] & (1u << (i&7))) != 0)
{
int j;
for (j = i+1; j < 256; j++)
if ((map[j/8] & (1u << (j&7))) == 0) break;
if (i == '-' || i == ']') fprintf(f, "\\");
if (PRINTABLE(i)) fprintf(f, "%c", i);
else fprintf(f, "\\x%02x", i);
if (--j > i)
{
if (j != i + 1) fprintf(f, "-");
if (j == '-' || j == ']') fprintf(f, "\\");
if (PRINTABLE(j)) fprintf(f, "%c", j);
else fprintf(f, "\\x%02x", j);
}
i = j;
}
}
ccode += 32 / sizeof(PCRE2_UCHAR);
}
/* For an XCLASS there is always some additional data */
if (*code == OP_XCLASS)
{
PCRE2_UCHAR ch;
while ((ch = *ccode++) != XCL_END)
{
BOOL not = FALSE;
const char *notch = "";
switch(ch)
{
case XCL_NOTPROP:
not = TRUE;
notch = "^";
/* Fall through */
case XCL_PROP:
{
unsigned int ptype = *ccode++;
unsigned int pvalue = *ccode++;
switch(ptype)
{
case PT_PXGRAPH:
fprintf(f, "[:%sgraph:]", notch);
break;
case PT_PXPRINT:
fprintf(f, "[:%sprint:]", notch);
break;
case PT_PXPUNCT:
fprintf(f, "[:%spunct:]", notch);
break;
default:
fprintf(f, "\\%c{%s}", (not? 'P':'p'),
get_ucpname(ptype, pvalue));
break;
}
}
break;
default:
ccode += 1 + print_char(f, ccode, utf);
if (ch == XCL_RANGE)
{
fprintf(f, "-");
ccode += 1 + print_char(f, ccode, utf);
}
break;
}
}
}
/* Indicate a non-UTF class which was created by negation */
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
/* Handle repeats after a class or a back reference */
CLASS_REF_REPEAT:
switch(*ccode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSSTAR:
case OP_CRPOSPLUS:
case OP_CRPOSQUERY:
fprintf(f, "%s", OP_names[*ccode]);
extra += OP_lengths[*ccode];
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
min = GET2(ccode,1);
max = GET2(ccode,1 + IMM2_SIZE);
if (max == 0) fprintf(f, "{%u,}", min);
else fprintf(f, "{%u,%u}", min, max);
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
extra += OP_lengths[*ccode];
break;
/* Do nothing if it's not a repeat; this code stops picky compilers
warning about the lack of a default code path. */
default:
break;
}
}
break;
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
fprintf(f, " %s ", OP_names[*code]);
print_custring_bylen(f, code + 2, code[1]);
extra += code[1];
break;
case OP_THEN:
fprintf(f, " %s", OP_names[*code]);
break;
case OP_CIRCM:
case OP_DOLLM:
flag = "/m";
/* Fall through */
/* Anything else is just an item with no data, but possibly a flag. */
default:
fprintf(f, " %s %s", flag, OP_names[*code]);
break;
}
code += OP_lengths[*code] + extra;
fprintf(f, "\n");
}
}
/* End of pcre2_printint.c */

494
pcre2/src/pcre2demo.c Normal file

@ -0,0 +1,494 @@
/*************************************************
* PCRE2 DEMONSTRATION PROGRAM *
*************************************************/
/* This is a demonstration program to illustrate a straightforward way of
using the PCRE2 regular expression library from a C program. See the
pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit
width. This demonstration program uses the 8-bit library. The default is to
process each code unit as a separate character, but if the pattern begins with
"(*UTF)", both it and the subject are treated as UTF-8 strings, where
characters may occupy multiple code units.
In Unix-like environments, if PCRE2 is installed in your standard system
libraries, you should be able to compile this program using this command:
cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
If PCRE2 is not installed in a standard place, it is likely to be installed
with support for the pkg-config mechanism. If you have pkg-config, you can
compile this program using this command:
cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
If you do not have pkg-config, you may have to use something like this:
cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
-R/usr/local/lib -lpcre2-8 -o pcre2demo
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
library files for PCRE2 are installed on your system. Only some operating
systems (Solaris is one) use the -R option.
Building under Windows:
If you want to statically link this program against a non-dll .a file, you must
define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
the following line. */
/* #define PCRE2_STATIC */
/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
For a program that uses only one code unit width, setting it to 8, 16, or 32
makes it possible to use generic function names such as pcre2_compile(). Note
that just changing 8 to 16 (for example) is not sufficient to convert this
program to process 16-bit characters. Even in a fully 16-bit environment, where
string-handling functions such as strcmp() and printf() work with 16-bit
characters, the code for handling the table of named substrings will still need
to be modified. */
#define PCRE2_CODE_UNIT_WIDTH 8
#include <stdio.h>
#include <string.h>
#include <pcre2.h>
/**************************************************************************
* Here is the program. The API includes the concept of "contexts" for *
* setting up unusual interface requirements for compiling and matching, *
* such as custom memory managers and non-standard newline definitions. *
* This program does not do any of this, so it makes no use of contexts, *
* always passing NULL where a context could be given. *
**************************************************************************/
int main(int argc, char **argv)
{
pcre2_code *re;
PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */
PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */
PCRE2_SPTR name_table;
int crlf_is_newline;
int errornumber;
int find_all;
int i;
int rc;
int utf8;
uint32_t option_bits;
uint32_t namecount;
uint32_t name_entry_size;
uint32_t newline;
PCRE2_SIZE erroroffset;
PCRE2_SIZE *ovector;
PCRE2_SIZE subject_length;
pcre2_match_data *match_data;
/**************************************************************************
* First, sort out the command line. There is only one possible option at *
* the moment, "-g" to request repeated matching to find all occurrences, *
* like Perl's /g option. We set the variable find_all to a non-zero value *
* if the -g option is present. *
**************************************************************************/
find_all = 0;
for (i = 1; i < argc; i++)
{
if (strcmp(argv[i], "-g") == 0) find_all = 1;
else if (argv[i][0] == '-')
{
printf("Unrecognised option %s\n", argv[i]);
return 1;
}
else break;
}
/* After the options, we require exactly two arguments, which are the pattern,
and the subject string. */
if (argc - i != 2)
{
printf("Exactly two arguments required: a regex and a subject string\n");
return 1;
}
/* Pattern and subject are char arguments, so they can be straightforwardly
cast to PCRE2_SPTR because we are working in 8-bit code units. The subject
length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact
defined to be size_t. */
pattern = (PCRE2_SPTR)argv[i];
subject = (PCRE2_SPTR)argv[i+1];
subject_length = (PCRE2_SIZE)strlen((char *)subject);
/*************************************************************************
* Now we are going to compile the regular expression pattern, and handle *
* any errors that are detected. *
*************************************************************************/
re = pcre2_compile(
pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */
&errornumber, /* for error number */
&erroroffset, /* for error offset */
NULL); /* use default compile context */
/* Compilation failed: print the error message and exit. */
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
buffer);
return 1;
}
/*************************************************************************
* If the compilation succeeded, we call PCRE2 again, in order to do a *
* pattern match against the subject string. This does just ONE match. If *
* further matching is needed, it will be done below. Before running the *
* match we must set up a match_data block for holding the result. Using *
* pcre2_match_data_create_from_pattern() ensures that the block is *
* exactly the right size for the number of capturing parentheses in the *
* pattern. If you need to know the actual size of a match_data block as *
* a number of bytes, you can find it like this: *
* *
* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data); *
*************************************************************************/
match_data = pcre2_match_data_create_from_pattern(re, NULL);
/* Now run the match. */
rc = pcre2_match(
re, /* the compiled pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
match_data, /* block for storing the result */
NULL); /* use default match context */
/* Matching failed: handle error cases */
if (rc < 0)
{
switch(rc)
{
case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); break;
}
pcre2_match_data_free(match_data); /* Release memory used for the match */
pcre2_code_free(re); /* data and the compiled pattern. */
return 1;
}
/* Match succeded. Get a pointer to the output vector, where string offsets are
stored. */
ovector = pcre2_get_ovector_pointer(match_data);
printf("Match succeeded at offset %d\n", (int)ovector[0]);
/*************************************************************************
* We have found the first match within the subject string. If the output *
* vector wasn't big enough, say so. Then output any substrings that were *
* captured. *
*************************************************************************/
/* The output vector wasn't big enough. This should not happen, because we used
pcre2_match_data_create_from_pattern() above. */
if (rc == 0)
printf("ovector was not big enough for all the captured substrings\n");
/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion
to set the start of a match later than its end. In this demonstration program,
we just detect this case and give up. */
if (ovector[0] > ovector[1])
{
printf("\\K was used in an assertion to set the match start after its end.\n"
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
(char *)(subject + ovector[1]));
printf("Run abandoned\n");
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return 1;
}
/* Show substrings stored in the output vector by number. Obviously, in a real
application you might want to do things other than print them. */
for (i = 0; i < rc; i++)
{
PCRE2_SPTR substring_start = subject + ovector[2*i];
PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
}
/**************************************************************************
* That concludes the basic part of this demonstration program. We have *
* compiled a pattern, and performed a single match. The code that follows *
* shows first how to access named substrings, and then how to code for *
* repeated matches on the same subject. *
**************************************************************************/
/* See if there are any named substrings, and if so, show them by name. First
we have to extract the count of named parentheses from the pattern. */
(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
&namecount); /* where to put the answer */
if (namecount == 0) printf("No named substrings\n"); else
{
PCRE2_SPTR tabptr;
printf("Named substrings\n");
/* Before we can access the substrings, we must extract the table for
translating names to numbers, and the size of each entry in the table. */
(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMETABLE, /* address of the table */
&name_table); /* where to put the answer */
(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
&name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. In the 8-bit library the number is held in two
bytes, most significant first. */
tabptr = name_table;
for (i = 0; i < namecount; i++)
{
int n = (tabptr[0] << 8) | tabptr[1];
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
tabptr += name_entry_size;
}
}
/*************************************************************************
* If the "-g" option was given on the command line, we want to continue *
* to search for additional matches in the subject string, in a similar *
* way to the /g option in Perl. This turns out to be trickier than you *
* might think because of the possibility of matching an empty string. *
* What happens is as follows: *
* *
* If the previous match was NOT for an empty string, we can just start *
* the next match at the end of the previous one. *
* *
* If the previous match WAS for an empty string, we can't do that, as it *
* would lead to an infinite loop. Instead, a call of pcre2_match() is *
* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
* first of these tells PCRE2 that an empty string at the start of the *
* subject is not a valid match; other possibilities must be tried. The *
* second flag restricts PCRE2 to one match attempt at the initial string *
* position. If this match succeeds, an alternative to the empty string *
* match has been found, and we can print it and proceed round the loop, *
* advancing by the length of whatever was found. If this match does not *
* succeed, we still stay in the loop, advancing by just one character. *
* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
* more than one byte. *
* *
* However, there is a complication concerned with newlines. When the *
* newline convention is such that CRLF is a valid newline, we must *
* advance by two characters rather than one. The newline convention can *
* be set in the regex by (*CR), etc.; if not, we must find the default. *
*************************************************************************/
if (!find_all) /* Check for -g */
{
pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_code_free(re); /* for the match data and the pattern. */
return 0; /* Exit the program. */
}
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
sequence. First, find the options with which the regex was compiled and extract
the UTF state. */
(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
utf8 = (option_bits & PCRE2_UTF) != 0;
/* Now find the newline convention and see whether CRLF is a valid newline
sequence. */
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */
for (;;)
{
uint32_t options = 0; /* Normally no options */
PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
/* If the previous match was for an empty string, we are finished if we are
at the end of the subject. Otherwise, arrange to run another match at the
same point to see if a non-empty match can be found. */
if (ovector[0] == ovector[1])
{
if (ovector[0] == subject_length) break;
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
/* If the previous match was not an empty string, there is one tricky case to
consider. If a pattern contains \K within a lookbehind assertion at the
start, the end of the matched string can be at the offset where the match
started. Without special action, this leads to a loop that keeps on matching
the same substring. We must detect this case and arrange to move the start on
by one character. The pcre2_get_startchar() function returns the starting
offset that was passed to pcre2_match(). */
else
{
PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
if (start_offset <= startchar)
{
if (startchar >= subject_length) break; /* Reached end of subject. */
start_offset = startchar + 1; /* Advance by one character. */
if (utf8) /* If UTF-8, it may be more */
{ /* than one code unit. */
for (; start_offset < subject_length; start_offset++)
if ((subject[start_offset] & 0xc0) != 0x80) break;
}
}
}
/* Run the next matching operation */
rc = pcre2_match(
re, /* the compiled pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
start_offset, /* starting offset in the subject */
options, /* options */
match_data, /* block for storing the result */
NULL); /* use default match context */
/* This time, a result of NOMATCH isn't an error. If the value in "options"
is zero, it just means we have found all possible matches, so the loop ends.
Otherwise, it means we have failed to find a non-empty-string match at a
point where there was a previous empty-string match. In this case, we do what
Perl does: advance the matching position by one character, and continue. We
do this by setting the "end of previous match" offset, because that is picked
up at the top of the loop as the point at which to start again.
There are two complications: (a) When CRLF is a valid newline sequence, and
the current position is just before it, advance by an extra byte. (b)
Otherwise we must ensure that we skip an entire UTF character if we are in
UTF mode. */
if (rc == PCRE2_ERROR_NOMATCH)
{
if (options == 0) break; /* All matches found */
ovector[1] = start_offset + 1; /* Advance one code unit */
if (crlf_is_newline && /* If CRLF is a newline & */
start_offset < subject_length - 1 && /* we are at CRLF, */
subject[start_offset] == '\r' &&
subject[start_offset + 1] == '\n')
ovector[1] += 1; /* Advance by one more. */
else if (utf8) /* Otherwise, ensure we */
{ /* advance a whole UTF-8 */
while (ovector[1] < subject_length) /* character. */
{
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
ovector[1] += 1;
}
}
continue; /* Go round the loop again */
}
/* Other matching errors are not recoverable. */
if (rc < 0)
{
printf("Matching error %d\n", rc);
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return 1;
}
/* Match succeded */
printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
/* The match succeeded, but the output vector wasn't big enough. This
should not happen. */
if (rc == 0)
printf("ovector was not big enough for all the captured substrings\n");
/* We must guard against patterns such as /(?=.\K)/ that use \K in an
assertion to set the start of a match later than its end. In this
demonstration program, we just detect this case and give up. */
if (ovector[0] > ovector[1])
{
printf("\\K was used in an assertion to set the match start after its end.\n"
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
(char *)(subject + ovector[1]));
printf("Run abandoned\n");
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return 1;
}
/* As before, show substrings stored in the output vector by number, and then
also any named substrings. */
for (i = 0; i < rc; i++)
{
PCRE2_SPTR substring_start = subject + ovector[2*i];
size_t substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
}
if (namecount == 0) printf("No named substrings\n"); else
{
PCRE2_SPTR tabptr = name_table;
printf("Named substrings\n");
for (i = 0; i < namecount; i++)
{
int n = (tabptr[0] << 8) | tabptr[1];
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
tabptr += name_entry_size;
}
}
} /* End of loop to find second and subsequent matches */
printf("\n");
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return 0;
}
/* End of pcre2demo.c */

423
pcre2/src/pcre2posix.c Normal file

@ -0,0 +1,423 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module is a wrapper that provides a POSIX API to the underlying PCRE2
functions. The operative functions are called pcre2_regcomp(), etc., with
wrappers that use the plain POSIX names. In addition, pcre2posix.h defines the
POSIX names as macros for the pcre2_xxx functions, so any program that includes
it and uses the POSIX names will call the base functions directly. This makes
it easier for an application to be sure it gets the PCRE2 versions in the
presence of other POSIX regex libraries. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
/* Ensure that the PCRE2POSIX_EXP_xxx macros are set appropriately for
compiling these functions. This must come before including pcre2posix.h, where
they are set for an application (using these functions) if they have not
previously been set. */
#if defined(_WIN32) && !defined(PCRE2_STATIC)
# define PCRE2POSIX_EXP_DECL extern __declspec(dllexport)
# define PCRE2POSIX_EXP_DEFN __declspec(dllexport)
#endif
/* Older versions of MSVC lack snprintf(). This define allows for
warning/error-free compilation and testing with MSVC compilers back to at least
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
#if defined(_MSC_VER) && (_MSC_VER < 1900)
#define snprintf _snprintf
#endif
/* Compile-time error numbers start at this value. It should probably never be
changed. This #define is a copy of the one in pcre2_internal.h. */
#define COMPILE_ERROR_BASE 100
/* Standard C headers */
#include <ctype.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* PCRE2 headers */
#include "pcre2.h"
#include "pcre2posix.h"
/* When compiling with the MSVC compiler, it is sometimes necessary to include
a "calling convention" before exported function names. (This is secondhand
information; I know nothing about MSVC myself). For example, something like
void __cdecl function(....)
might be needed. In order to make this easy, all the exported functions have
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
set, we ensure here that it has no effect. */
#ifndef PCRE2_CALL_CONVENTION
#define PCRE2_CALL_CONVENTION
#endif
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
don't. */
static const int eint1[] = {
0, /* No error */
REG_EESCAPE, /* \ at end of pattern */
REG_EESCAPE, /* \c at end of pattern */
REG_EESCAPE, /* unrecognized character follows \ */
REG_BADBR, /* numbers out of order in {} quantifier */
/* 5 */
REG_BADBR, /* number too big in {} quantifier */
REG_EBRACK, /* missing terminating ] for character class */
REG_ECTYPE, /* invalid escape sequence in character class */
REG_ERANGE, /* range out of order in character class */
REG_BADRPT, /* nothing to repeat */
/* 10 */
REG_ASSERT, /* internal error: unexpected repeat */
REG_BADPAT, /* unrecognized character after (? or (?- */
REG_BADPAT, /* POSIX named classes are supported only within a class */
REG_BADPAT, /* POSIX collating elements are not supported */
REG_EPAREN, /* missing ) */
/* 15 */
REG_ESUBREG, /* reference to non-existent subpattern */
REG_INVARG, /* pattern passed as NULL */
REG_INVARG, /* unknown compile-time option bit(s) */
REG_EPAREN, /* missing ) after (?# comment */
REG_ESIZE, /* parentheses nested too deeply */
/* 20 */
REG_ESIZE, /* regular expression too large */
REG_ESPACE, /* failed to get memory */
REG_EPAREN, /* unmatched closing parenthesis */
REG_ASSERT /* internal error: code overflow */
};
static const int eint2[] = {
30, REG_ECTYPE, /* unknown POSIX class name */
32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
56, REG_INVARG, /* internal error: unknown newline setting */
92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */
};
/* Table of texts corresponding to POSIX error codes */
static const char *const pstring[] = {
"", /* Dummy for value 0 */
"internal error", /* REG_ASSERT */
"invalid repeat counts in {}", /* BADBR */
"pattern error", /* BADPAT */
"? * + invalid", /* BADRPT */
"unbalanced {}", /* EBRACE */
"unbalanced []", /* EBRACK */
"collation error - not relevant", /* ECOLLATE */
"bad class", /* ECTYPE */
"bad escape sequence", /* EESCAPE */
"empty expression", /* EMPTY */
"unbalanced ()", /* EPAREN */
"bad range inside []", /* ERANGE */
"expression too big", /* ESIZE */
"failed to get memory", /* ESPACE */
"bad back reference", /* ESUBREG */
"bad argument", /* INVARG */
"match failed" /* NOMATCH */
};
/*************************************************
* Wrappers with traditional POSIX names *
*************************************************/
/* Keep defining them to preseve the ABI for applications linked to the pcre2
POSIX library before these names were changed into macros in pcre2posix.h.
This also ensures that the POSIX names are callable from languages that do not
include pcre2posix.h. It is vital to #undef the macro definitions from
pcre2posix.h! */
#undef regerror
PCRE2POSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
return pcre2_regerror(errcode, preg, errbuf, errbuf_size);
}
#undef regfree
PCRE2POSIX_EXP_DECL void regfree(regex_t *);
PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
regfree(regex_t *preg)
{
pcre2_regfree(preg);
}
#undef regcomp
PCRE2POSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
regcomp(regex_t *preg, const char *pattern, int cflags)
{
return pcre2_regcomp(preg, pattern, cflags);
}
#undef regexec
PCRE2POSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
return pcre2_regexec(preg, string, nmatch, pmatch, eflags);
}
/*************************************************
* Translate error code to string *
*************************************************/
PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
pcre2_regerror(int errcode, const regex_t *preg, char *errbuf,
size_t errbuf_size)
{
int used;
const char *message;
message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
"unknown error code" : pstring[errcode];
if (preg != NULL && (int)preg->re_erroffset != -1)
{
used = snprintf(errbuf, errbuf_size, "%s at offset %-6d", message,
(int)preg->re_erroffset);
}
else
{
used = snprintf(errbuf, errbuf_size, "%s", message);
}
return used + 1;
}
/*************************************************
* Free store held by a regex *
*************************************************/
PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_regfree(regex_t *preg)
{
pcre2_match_data_free(preg->re_match_data);
pcre2_code_free(preg->re_pcre2_code);
}
/*************************************************
* Compile a regular expression *
*************************************************/
/*
Arguments:
preg points to a structure for recording the compiled expression
pattern the pattern to compile
cflags compilation flags
Returns: 0 on success
various non-zero codes on failure
*/
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_regcomp(regex_t *preg, const char *pattern, int cflags)
{
PCRE2_SIZE erroffset;
PCRE2_SIZE patlen;
int errorcode;
int options = 0;
int re_nsub = 0;
patlen = ((cflags & REG_PEND) != 0)? (PCRE2_SIZE)(preg->re_endp - pattern) :
PCRE2_ZERO_TERMINATED;
if ((cflags & REG_ICASE) != 0) options |= PCRE2_CASELESS;
if ((cflags & REG_NEWLINE) != 0) options |= PCRE2_MULTILINE;
if ((cflags & REG_DOTALL) != 0) options |= PCRE2_DOTALL;
if ((cflags & REG_NOSPEC) != 0) options |= PCRE2_LITERAL;
if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
preg->re_cflags = cflags;
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, patlen, options,
&errorcode, &erroffset, NULL);
preg->re_erroffset = erroffset;
if (preg->re_pcre2_code == NULL)
{
unsigned int i;
/* A negative value is a UTF error; otherwise all error codes are greater
than COMPILE_ERROR_BASE, but check, just in case. */
if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT;
errorcode -= COMPILE_ERROR_BASE;
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
return eint1[errorcode];
for (i = 0; i < sizeof(eint2)/sizeof(const int); i += 2)
if (errorcode == eint2[i]) return eint2[i+1];
return REG_BADPAT;
}
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
preg->re_nsub = (size_t)re_nsub;
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
preg->re_erroffset = (size_t)(-1); /* No meaning after successful compile */
if (preg->re_match_data == NULL)
{
pcre2_code_free(preg->re_pcre2_code);
return REG_ESPACE;
}
return 0;
}
/*************************************************
* Match a regular expression *
*************************************************/
/* A suitable match_data block, large enough to hold all possible captures, was
obtained when the pattern was compiled, to save having to allocate and free it
for each match. If REG_NOSUB was specified at compile time, the nmatch and
pmatch arguments are ignored, and the only result is yes/no/error. */
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
int rc, so, eo;
int options = 0;
pcre2_match_data *md = (pcre2_match_data *)preg->re_match_data;
if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY;
/* When REG_NOSUB was specified, or if no vector has been passed in which to
put captured strings, ensure that nmatch is zero. This will stop any attempt to
write to pmatch. */
if ((preg->re_cflags & REG_NOSUB) != 0 || pmatch == NULL) nmatch = 0;
/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
The man page from OS X says "REG_STARTEND affects only the location of the
string, not how it is matched". That is why the "so" value is used to bump the
start location rather than being passed as a PCRE2 "starting offset". */
if ((eflags & REG_STARTEND) != 0)
{
if (pmatch == NULL) return REG_INVARG;
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
}
else
{
so = 0;
eo = (int)strlen(string);
}
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
/* Successful match */
if (rc >= 0)
{
size_t i;
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
if ((size_t)rc > nmatch) rc = (int)nmatch;
for (i = 0; i < (size_t)rc; i++)
{
pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 :
(int)(ovector[i*2] + so);
pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 :
(int)(ovector[i*2+1] + so);
}
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
return 0;
}
/* Unsuccessful match */
if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
return REG_INVARG;
switch(rc)
{
default: return REG_ASSERT;
case PCRE2_ERROR_BADMODE: return REG_INVARG;
case PCRE2_ERROR_BADMAGIC: return REG_INVARG;
case PCRE2_ERROR_BADOPTION: return REG_INVARG;
case PCRE2_ERROR_BADUTFOFFSET: return REG_INVARG;
case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE;
case PCRE2_ERROR_NOMATCH: return REG_NOMATCH;
case PCRE2_ERROR_NOMEMORY: return REG_ESPACE;
case PCRE2_ERROR_NULL: return REG_INVARG;
}
}
/* End of pcre2posix.c */

170
pcre2/src/pcre2posix.h Normal file

@ -0,0 +1,170 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language. This is
the public header file to be #included by applications that call PCRE2 via the
POSIX wrapper interface.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* Have to include stdlib.h in order to ensure that size_t is defined. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options, mostly defined by POSIX, but with some extras. */
#define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */
#define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */
#define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */
#define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */
#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */
#define REG_NOSUB 0x0020 /* Do not report what was matched */
#define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */
#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */
#define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */
#define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */
/* This is not used by PCRE2, but by defining it we make it easier
to slot PCRE2 into existing programs that make POSIX calls. */
#define REG_EXTENDED 0
/* Error values. Not all these are relevant or used by the wrapper. */
enum {
REG_ASSERT = 1, /* internal error ? */
REG_BADBR, /* invalid repeat counts in {} */
REG_BADPAT, /* pattern error */
REG_BADRPT, /* ? * + invalid */
REG_EBRACE, /* unbalanced {} */
REG_EBRACK, /* unbalanced [] */
REG_ECOLLATE, /* collation error - not relevant */
REG_ECTYPE, /* bad class */
REG_EESCAPE, /* bad escape sequence */
REG_EMPTY, /* empty expression */
REG_EPAREN, /* unbalanced () */
REG_ERANGE, /* bad range inside [] */
REG_ESIZE, /* expression too big */
REG_ESPACE, /* failed to get memory */
REG_ESUBREG, /* bad back reference */
REG_INVARG, /* bad argument */
REG_NOMATCH /* match failed */
};
/* The structure representing a compiled regular expression. It is also used
for passing the pattern end pointer when REG_PEND is set. */
typedef struct {
void *re_pcre2_code;
void *re_match_data;
const char *re_endp;
size_t re_nsub;
size_t re_erroffset;
int re_cflags;
} regex_t;
/* The structure in which a captured offset is returned. */
typedef int regoff_t;
typedef struct {
regoff_t rm_so;
regoff_t rm_eo;
} regmatch_t;
/* When an application links to a PCRE2 DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
export settings are needed, and are set in pcre2posix.c before including this
file. */
#if defined(_WIN32) && !defined(PCRE2_STATIC) && !defined(PCRE2POSIX_EXP_DECL)
# define PCRE2POSIX_EXP_DECL extern __declspec(dllimport)
# define PCRE2POSIX_EXP_DEFN __declspec(dllimport)
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE2POSIX_EXP_DECL
# ifdef __cplusplus
# define PCRE2POSIX_EXP_DECL extern "C"
# define PCRE2POSIX_EXP_DEFN extern "C"
# else
# define PCRE2POSIX_EXP_DECL extern
# define PCRE2POSIX_EXP_DEFN extern
# endif
#endif
/* The functions. The actual code is in functions with pcre2_xxx names for
uniqueness. POSIX names are provided as macros for API compatibility with POSIX
regex functions. It's done this way to ensure to they are always linked from
the PCRE2 library and not by accident from elsewhere (regex_t differs in size
elsewhere). */
PCRE2POSIX_EXP_DECL int pcre2_regcomp(regex_t *, const char *, int);
PCRE2POSIX_EXP_DECL int pcre2_regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
PCRE2POSIX_EXP_DECL size_t pcre2_regerror(int, const regex_t *, char *, size_t);
PCRE2POSIX_EXP_DECL void pcre2_regfree(regex_t *);
#define regcomp pcre2_regcomp
#define regexec pcre2_regexec
#define regerror pcre2_regerror
#define regfree pcre2_regfree
/* Debian had a patch that used different names. These are now here to save
them having to maintain their own patch, but are not documented by PCRE2. */
#define PCRE2regcomp pcre2_regcomp
#define PCRE2regexec pcre2_regexec
#define PCRE2regerror pcre2_regerror
#define PCRE2regfree pcre2_regfree
#ifdef __cplusplus
} /* extern "C" */
#endif
/* End of pcre2posix.h */

148
pcre2/test-driver vendored Executable file

@ -0,0 +1,148 @@
#! /bin/sh
# test-driver - basic testsuite driver script.
scriptversion=2018-03-07.03; # UTC
# Copyright (C) 2011-2020 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
# Make unconditional expansion of undefined variables an error. This
# helps a lot in preventing typo-related bugs.
set -u
usage_error ()
{
echo "$0: $*" >&2
print_usage >&2
exit 2
}
print_usage ()
{
cat <<END
Usage:
test-driver --test-name=NAME --log-file=PATH --trs-file=PATH
[--expect-failure={yes|no}] [--color-tests={yes|no}]
[--enable-hard-errors={yes|no}] [--]
TEST-SCRIPT [TEST-SCRIPT-ARGUMENTS]
The '--test-name', '--log-file' and '--trs-file' options are mandatory.
END
}
test_name= # Used for reporting.
log_file= # Where to save the output of the test script.
trs_file= # Where to save the metadata of the test run.
expect_failure=no
color_tests=no
enable_hard_errors=yes
while test $# -gt 0; do
case $1 in
--help) print_usage; exit $?;;
--version) echo "test-driver $scriptversion"; exit $?;;
--test-name) test_name=$2; shift;;
--log-file) log_file=$2; shift;;
--trs-file) trs_file=$2; shift;;
--color-tests) color_tests=$2; shift;;
--expect-failure) expect_failure=$2; shift;;
--enable-hard-errors) enable_hard_errors=$2; shift;;
--) shift; break;;
-*) usage_error "invalid option: '$1'";;
*) break;;
esac
shift
done
missing_opts=
test x"$test_name" = x && missing_opts="$missing_opts --test-name"
test x"$log_file" = x && missing_opts="$missing_opts --log-file"
test x"$trs_file" = x && missing_opts="$missing_opts --trs-file"
if test x"$missing_opts" != x; then
usage_error "the following mandatory options are missing:$missing_opts"
fi
if test $# -eq 0; then
usage_error "missing argument"
fi
if test $color_tests = yes; then
# Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'.
red='' # Red.
grn='' # Green.
lgn='' # Light green.
blu='' # Blue.
mgn='' # Magenta.
std='' # No color.
else
red= grn= lgn= blu= mgn= std=
fi
do_exit='rm -f $log_file $trs_file; (exit $st); exit $st'
trap "st=129; $do_exit" 1
trap "st=130; $do_exit" 2
trap "st=141; $do_exit" 13
trap "st=143; $do_exit" 15
# Test script is run here.
"$@" >$log_file 2>&1
estatus=$?
if test $enable_hard_errors = no && test $estatus -eq 99; then
tweaked_estatus=1
else
tweaked_estatus=$estatus
fi
case $tweaked_estatus:$expect_failure in
0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
0:*) col=$grn res=PASS recheck=no gcopy=no;;
77:*) col=$blu res=SKIP recheck=no gcopy=yes;;
99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;;
*:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;;
*:*) col=$red res=FAIL recheck=yes gcopy=yes;;
esac
# Report the test outcome and exit status in the logs, so that one can
# know whether the test passed or failed simply by looking at the '.log'
# file, without the need of also peaking into the corresponding '.trs'
# file (automake bug#11814).
echo "$res $test_name (exit status: $estatus)" >>$log_file
# Report outcome to console.
echo "${col}${res}${std}: $test_name"
# Register the test result, and other relevant metadata.
echo ":test-result: $res" > $trs_file
echo ":global-test-result: $res" >> $trs_file
echo ":recheck: $recheck" >> $trs_file
echo ":copy-in-global-log: $gcopy" >> $trs_file
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End: