mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-03-31 17:07:27 +08:00
Update pcre2 to 10.36
This performs *most* of the pcreectomy of b418e36f226b754b. It removes the tests and docs and all the large files, but it does *not* touch any of the files except for making Find_Package quiet (783a895b1163149d) or remove the AUTHORS and similar files as they are very small. This seems much easier, cleaner, nicer and has 90% of the effect of the old - the size now is 2.7MB instead of 2.1MB, down from 10MB. Fixes #7599
This commit is contained in:
parent
2d78c9a0d9
commit
97be837ff5
pcre2
132htmlAUTHORSCMakeLists.txtCOPYINGChangeLogCheckManCleanTxtDetrailHACKINGINSTALLLICENCENEWSNON-AUTOTOOLS-BUILDPrepareReleaseREADMEaclocal.m4ar-lib
cmake
compileconfig.guessconfig.subconfigure.acdepcompinstall-shlibpcre2-16.pc.inlibpcre2-32.pc.inlibpcre2-8.pc.inlibpcre2-posix.pc.inmissingpcre2-config.insrc
config.h.genericconfig.h.inpcre2.h.genericpcre2_compile.cpcre2_dftables.cpcre2_fuzzsupport.cpcre2_jit_compile.cpcre2_jit_misc.cpcre2_jit_neon_inc.hpcre2_jit_simd_inc.hpcre2_match.cpcre2_printint.cpcre2demo.cpcre2posix.cpcre2posix.h
test-driver
314
pcre2/132html
vendored
Executable file
314
pcre2/132html
vendored
Executable file
@ -0,0 +1,314 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to turn PCRE2 man pages into HTML
|
||||
|
||||
|
||||
# Subroutine to handle font changes and other escapes
|
||||
|
||||
sub do_line {
|
||||
my($s) = $_[0];
|
||||
|
||||
$s =~ s/</</g; # Deal with < and >
|
||||
$s =~ s/>/>/g;
|
||||
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
|
||||
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
|
||||
$s =~ s"\\e"\\"g;
|
||||
$s =~ s/(?<=Copyright )\(c\)/©/g;
|
||||
$s;
|
||||
}
|
||||
|
||||
# Subroutine to ensure not in a paragraph
|
||||
|
||||
sub end_para {
|
||||
if ($inpara)
|
||||
{
|
||||
print TEMP "</PRE>\n" if ($inpre);
|
||||
print TEMP "</P>\n";
|
||||
}
|
||||
$inpara = $inpre = 0;
|
||||
$wrotetext = 0;
|
||||
}
|
||||
|
||||
# Subroutine to start a new paragraph
|
||||
|
||||
sub new_para {
|
||||
&end_para();
|
||||
print TEMP "<P>\n";
|
||||
$inpara = 1;
|
||||
}
|
||||
|
||||
|
||||
# Main program
|
||||
|
||||
$innf = 0;
|
||||
$inpara = 0;
|
||||
$inpre = 0;
|
||||
$wrotetext = 0;
|
||||
$toc = 0;
|
||||
$ref = 1;
|
||||
|
||||
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
|
||||
{
|
||||
$toc = 1 if $ARGV[0] eq "-toc";
|
||||
shift;
|
||||
}
|
||||
|
||||
# Initial output to STDOUT
|
||||
|
||||
print <<End ;
|
||||
<html>
|
||||
<head>
|
||||
<title>$ARGV[0] specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>$ARGV[0] man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
End
|
||||
|
||||
print "<ul>\n" if ($toc);
|
||||
|
||||
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
# Handle lines beginning with a dot
|
||||
|
||||
if (/^\./)
|
||||
{
|
||||
# Some of the PCRE2 man pages used to contain instances of .br. However,
|
||||
# they should have all been removed because they cause trouble in some
|
||||
# (other) automated systems that translate man pages to HTML. Complain if
|
||||
# we find .br or .in (another macro that is deprecated).
|
||||
|
||||
if (/^\.br/ || /^\.in/)
|
||||
{
|
||||
print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
|
||||
print STDERR "*** $_\n";
|
||||
die "*** Processing abandoned\n";
|
||||
}
|
||||
|
||||
# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
|
||||
|
||||
elsif (/^\.nf/)
|
||||
{
|
||||
$innf = 1;
|
||||
}
|
||||
|
||||
elsif (/^\.fi/)
|
||||
{
|
||||
$innf = 0;
|
||||
}
|
||||
|
||||
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
||||
# the next line is a non literal text line; similarly, if not inside a
|
||||
# literal section, do nothing if a literal follows, unless we are inside
|
||||
# a .nf/.fi section or about to enter one. The point being that the <pre>
|
||||
# and </pre> that delimit literal sections will do the spacing. Always skip
|
||||
# if no previous output.
|
||||
|
||||
elsif (/^\.sp/)
|
||||
{
|
||||
if ($wrotetext)
|
||||
{
|
||||
$_ = <STDIN>;
|
||||
if ($inpre)
|
||||
{
|
||||
print TEMP "\n" if (/^[\s.]/);
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
|
||||
}
|
||||
redo; # Now process the lookahead line we just read
|
||||
}
|
||||
}
|
||||
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
|
||||
{
|
||||
&new_para();
|
||||
}
|
||||
elsif (/^\.SH\s*("?)(.*)\1/)
|
||||
{
|
||||
# Ignore the NAME section
|
||||
if ($2 =~ /^NAME\b/)
|
||||
{
|
||||
<STDIN>;
|
||||
next;
|
||||
}
|
||||
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
if ($toc)
|
||||
{
|
||||
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
|
||||
$ref, $ref);
|
||||
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
|
||||
$ref);
|
||||
$ref++;
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
}
|
||||
elsif (/^\.SS\s*("?)(.*)\1/)
|
||||
{
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
elsif (/^\.B\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<b>$_</b>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
elsif (/^\.I\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<i>$_</i>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# A comment that starts "HREF" takes the next line as a name that
|
||||
# is turned into a hyperlink, using the text given, which might be
|
||||
# in a special font. If it ends in () or (digits) or punctuation, they
|
||||
# aren't part of the link.
|
||||
|
||||
elsif (/^\.\\"\s*HREF/)
|
||||
{
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
$_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
|
||||
print TEMP "<a href=\"$1.html\">$_</a>\n";
|
||||
}
|
||||
|
||||
# A comment that starts "HTML" inserts literal HTML
|
||||
|
||||
elsif (/^\.\\"\s*HTML\s*(.*)/)
|
||||
{
|
||||
print TEMP $1;
|
||||
}
|
||||
|
||||
# A comment that starts < inserts that HTML at the end of the
|
||||
# *next* input line - so as not to get a newline between them.
|
||||
|
||||
elsif (/^\.\\"\s*(<.*>)/)
|
||||
{
|
||||
my($markup) = $1;
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
print TEMP "$_$markup\n";
|
||||
}
|
||||
|
||||
# A comment that starts JOIN joins the next two lines together, with one
|
||||
# space between them. Then that line is processed. This is used in some
|
||||
# displays where two lines are needed for the "man" version. JOINSH works
|
||||
# the same, except that it assumes this is a shell command, so removes
|
||||
# continuation backslashes.
|
||||
|
||||
elsif (/^\.\\"\s*JOIN(SH)?/)
|
||||
{
|
||||
my($one,$two);
|
||||
$one = <STDIN>;
|
||||
$two = <STDIN>;
|
||||
$one =~ s/\s*\\e\s*$// if (defined($1));
|
||||
chomp($one);
|
||||
$two =~ s/^\s+//;
|
||||
$_ = "$one $two";
|
||||
redo; # Process the joined lines
|
||||
}
|
||||
|
||||
# .EX/.EE are used in the pcre2demo page to bracket the entire program,
|
||||
# which is unmodified except for turning backslash into "\e".
|
||||
|
||||
elsif (/^\.EX\s*$/)
|
||||
{
|
||||
print TEMP "<PRE>\n";
|
||||
while (<STDIN>)
|
||||
{
|
||||
last if /^\.EE\s*$/;
|
||||
s/\\e/\\/g;
|
||||
s/&/&/g;
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
print TEMP;
|
||||
}
|
||||
}
|
||||
|
||||
# Ignore anything not recognized
|
||||
|
||||
next;
|
||||
}
|
||||
|
||||
# Line does not begin with a dot. Replace blank lines with new paragraphs
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
&end_para() if ($wrotetext);
|
||||
next;
|
||||
}
|
||||
|
||||
# Convert fonts changes and output an ordinary line. Ensure that indented
|
||||
# lines are marked as literal.
|
||||
|
||||
$_ = &do_line($_);
|
||||
&new_para() if (!$inpara);
|
||||
|
||||
if (/^\s/)
|
||||
{
|
||||
if (!$inpre)
|
||||
{
|
||||
print TEMP "<pre>\n";
|
||||
$inpre = 1;
|
||||
}
|
||||
}
|
||||
elsif ($inpre)
|
||||
{
|
||||
print TEMP "</pre>\n";
|
||||
$inpre = 0;
|
||||
}
|
||||
|
||||
# Add <br> to the end of a non-literal line if we are within .nf/.fi
|
||||
|
||||
$_ .= "<br>\n" if (!$inpre && $innf);
|
||||
|
||||
print TEMP;
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# The TOC, if present, will have been written - terminate it
|
||||
|
||||
print "</ul>\n" if ($toc);
|
||||
|
||||
# Copy the remainder to the standard output
|
||||
|
||||
close(TEMP);
|
||||
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
|
||||
|
||||
print while (<TEMP>);
|
||||
|
||||
print <<End ;
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
End
|
||||
|
||||
close(TEMP);
|
||||
unlink("/tmp/$$");
|
||||
|
||||
# End
|
36
pcre2/AUTHORS
vendored
Normal file
36
pcre2/AUTHORS
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
THE MAIN PCRE2 LIBRARY CODE
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2020 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
|
||||
--------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2020 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2020 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
####
|
67
pcre2/CMakeLists.txt
vendored
67
pcre2/CMakeLists.txt
vendored
@ -92,11 +92,13 @@
|
||||
# library versioning.
|
||||
# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
|
||||
# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
|
||||
# 2020-05-25 PH added a check for Intel CET
|
||||
# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
# Increased minimum to 2.8.0 to support newer add_test features.
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
||||
# Increased minimum to 2.8.5 to support GNUInstallDirs.
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.5)
|
||||
|
||||
# Set policy CMP0026 to avoid warnings for the use of LOCATION in
|
||||
# GET_TARGET_PROPERTY. This should no longer be required.
|
||||
@ -108,7 +110,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
||||
|
||||
LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I\"${PROJECT_SOURCE_DIR}/src\"")
|
||||
|
||||
# external packages
|
||||
FIND_PACKAGE( BZip2 QUIET )
|
||||
@ -123,6 +125,7 @@ INCLUDE(CheckFunctionExists)
|
||||
INCLUDE(CheckSymbolExists)
|
||||
INCLUDE(CheckIncludeFile)
|
||||
INCLUDE(CheckTypeSize)
|
||||
INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR
|
||||
|
||||
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
||||
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
|
||||
@ -132,11 +135,11 @@ CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
|
||||
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
|
||||
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
|
||||
|
||||
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
|
||||
CHECK_FUNCTION_EXISTS(memfd_create HAVE_MEMFD_CREATE)
|
||||
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
|
||||
CHECK_FUNCTION_EXISTS(secure_getenv HAVE_SECURE_GETENV)
|
||||
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
|
||||
CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY)
|
||||
CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE)
|
||||
CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE)
|
||||
CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV)
|
||||
CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR)
|
||||
|
||||
set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
|
||||
@ -146,6 +149,24 @@ CHECK_C_SOURCE_COMPILES(
|
||||
)
|
||||
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
|
||||
# code was written by PH, trying to imitate the logic from the autotools
|
||||
# configuration.
|
||||
|
||||
CHECK_C_SOURCE_COMPILES(
|
||||
"#ifndef __CET__
|
||||
#error CET is not enabled
|
||||
#endif
|
||||
int main() { return 0; }"
|
||||
INTEL_CET_ENABLED
|
||||
)
|
||||
|
||||
IF (INTEL_CET_ENABLED)
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
|
||||
ENDIF(INTEL_CET_ENABLED)
|
||||
|
||||
|
||||
|
||||
# User-configurable options
|
||||
#
|
||||
# Note: CMakeSetup displays these in alphabetical order, regardless of
|
||||
@ -445,7 +466,7 @@ foreach(configure_line ${configure_lines})
|
||||
foreach(_substitution_variable ${SEARCHED_VARIABLES})
|
||||
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
|
||||
if (NOT ${_substitution_variable_upper})
|
||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MACTHED_STRING ${configure_line})
|
||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line})
|
||||
if (CMAKE_MATCH_1)
|
||||
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
@ -475,14 +496,23 @@ CONFIGURE_FILE(src/pcre2.h.in
|
||||
${PROJECT_BINARY_DIR}/pcre2.h
|
||||
@ONLY)
|
||||
|
||||
# Make sure to not link debug libs
|
||||
# against release libs and vice versa
|
||||
IF(WIN32)
|
||||
SET(CMAKE_DEBUG_POSTFIX "d")
|
||||
ENDIF(WIN32)
|
||||
|
||||
# Generate pkg-config files
|
||||
|
||||
SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
|
||||
SET(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
|
||||
SET(exec_prefix "\${prefix}")
|
||||
SET(libdir "\${exec_prefix}/lib")
|
||||
SET(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
|
||||
SET(includedir "\${prefix}/include")
|
||||
IF(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
|
||||
SET(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
|
||||
ENDIF()
|
||||
CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
|
||||
SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
|
||||
|
||||
@ -622,11 +652,6 @@ IF(MSVC)
|
||||
ENDIF(MSVC)
|
||||
|
||||
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||
# needed to make sure to not link debug libs
|
||||
# against release libs and vice versa
|
||||
IF(WIN32)
|
||||
SET(CMAKE_DEBUG_POSTFIX "d")
|
||||
ENDIF(WIN32)
|
||||
|
||||
SET(targets)
|
||||
|
||||
@ -827,7 +852,9 @@ if test \"$?\" != \"0\"; then exit 1; fi
|
||||
\@echo off
|
||||
setlocal
|
||||
SET srcdir=\"${winsrc}\"
|
||||
SET pcre2test=\"${winexe}\"
|
||||
# The next line was replaced by the following one after a user comment.
|
||||
# SET pcre2test=\"${winexe}\"
|
||||
SET pcre2test=\"${winbin}\\pcre2test.exe\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||
call %srcdir%\\RunTest.Bat
|
||||
if errorlevel 1 exit /b 1
|
||||
@ -863,9 +890,9 @@ SET(CMAKE_INSTALL_ALWAYS 1)
|
||||
|
||||
INSTALL(TARGETS ${targets}
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
INSTALL(FILES ${pkg_config_files} DESTINATION lib/pkgconfig)
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
|
||||
DESTINATION bin
|
||||
# Set 0755 permissions
|
||||
@ -916,7 +943,7 @@ IF(PCRE2_SHOW_REPORT)
|
||||
ENDIF(CMAKE_C_FLAGS)
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "PCRE2 configuration summary:")
|
||||
MESSAGE(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
||||
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
||||
|
5
pcre2/COPYING
vendored
Normal file
5
pcre2/COPYING
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
PCRE2 LICENCE
|
||||
|
||||
Please see the file LICENCE in the PCRE2 distribution for licensing details.
|
||||
|
||||
End
|
2434
pcre2/ChangeLog
vendored
Normal file
2434
pcre2/ChangeLog
vendored
Normal file
File diff suppressed because it is too large
Load Diff
78
pcre2/CheckMan
vendored
Executable file
78
pcre2/CheckMan
vendored
Executable file
@ -0,0 +1,78 @@
|
||||
#! /usr/bin/perl
|
||||
|
||||
# A script to scan PCRE2's man pages to check for typos in the control
|
||||
# sequences. I use only a small set of the available repertoire, so it is
|
||||
# straightforward to check that nothing else has slipped in by mistake. This
|
||||
# script should be called in the doc directory.
|
||||
|
||||
$yield = 0;
|
||||
|
||||
while (scalar(@ARGV) > 0)
|
||||
{
|
||||
$line = 0;
|
||||
$file = shift @ARGV;
|
||||
|
||||
open (IN, $file) || die "Failed to open $file\n";
|
||||
|
||||
while (<IN>)
|
||||
{
|
||||
$count = 0;
|
||||
$line++;
|
||||
if (/^\s*$/)
|
||||
{
|
||||
printf "Empty line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
elsif (/^\./)
|
||||
{
|
||||
if (!/^\.\s*$|
|
||||
^\.B\s+\S|
|
||||
^\.TH\s\S|
|
||||
^\.SH\s\S|
|
||||
^\.SS\s\S|
|
||||
^\.TP(?:\s?\d+)?\s*$|
|
||||
^\.SM\s*$|
|
||||
^\.br\s*$|
|
||||
^\.rs\s*$|
|
||||
^\.sp\s*$|
|
||||
^\.nf\s*$|
|
||||
^\.fi\s*$|
|
||||
^\.P\s*$|
|
||||
^\.PP\s*$|
|
||||
^\.\\"(?:\ HREF)?\s*$|
|
||||
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
|
||||
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
|
||||
^\.\\"\s<\/a>\s*$|
|
||||
^\.\\"\sJOINSH\s*$|
|
||||
^\.\\"\sJOIN\s*$/x
|
||||
)
|
||||
{
|
||||
printf "Bad control line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
}
|
||||
elsif (/\\[^ef]|\\f[^IBP]/)
|
||||
{
|
||||
printf "Bad backslash in line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
while (/\\f[BI]/g)
|
||||
{
|
||||
$count++;
|
||||
}
|
||||
while (/\\fP/g)
|
||||
{
|
||||
$count--;
|
||||
}
|
||||
if ($count != 0)
|
||||
{
|
||||
printf "Mismatching formatting in line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
}
|
||||
|
||||
close(IN);
|
||||
}
|
||||
|
||||
exit $yield;
|
||||
# End
|
113
pcre2/CleanTxt
vendored
Executable file
113
pcre2/CleanTxt
vendored
Executable file
@ -0,0 +1,113 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to take the output of nroff -man and remove all the backspacing and
|
||||
# the page footers and the screen commands etc so that it is more usefully
|
||||
# readable online. In fact, in the latest nroff, intermediate footers don't
|
||||
# seem to be generated any more.
|
||||
|
||||
$blankcount = 0;
|
||||
$lastwascut = 0;
|
||||
$firstheader = 1;
|
||||
|
||||
# Input on STDIN; output to STDOUT.
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
s/.\x8//g; # Remove "char, backspace"
|
||||
|
||||
# Handle header lines. Retain only the first one we encounter, but remove
|
||||
# the blank line that follows. Any others (e.g. at end of document) and the
|
||||
# following blank line are dropped.
|
||||
|
||||
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
|
||||
{
|
||||
if ($firstheader)
|
||||
{
|
||||
$firstheader = 0;
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
}
|
||||
$_=<STDIN>; # Remove a blank that follows
|
||||
next;
|
||||
}
|
||||
|
||||
# Count runs of empty lines
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
$blankcount++;
|
||||
$lastwascut = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
# If a chunk of lines has been cut out (page footer) and the next line
|
||||
# has a different indentation, put back one blank line.
|
||||
|
||||
if ($lastwascut && $blankcount < 1 && defined($lastprinted))
|
||||
{
|
||||
($a) = $lastprinted =~ /^(\s*)/;
|
||||
($b) = $_ =~ /^(\s*)/;
|
||||
$blankcount++ if ($a ne $b);
|
||||
}
|
||||
|
||||
# We get here only when we have a non-blank line in hand. If it was preceded
|
||||
# by 3 or more blank lines, read the next 3 lines and see if they are blank.
|
||||
# If so, remove all 7 lines, and remember that we have just done a cut.
|
||||
|
||||
if ($blankcount >= 3)
|
||||
{
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] = <STDIN>;
|
||||
$next[$i] = "" if !defined $next[$i];
|
||||
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
$next[$i] =~ s/.\x8//g; # Remove "char, backspace"
|
||||
}
|
||||
|
||||
# Cut out chunks of the form <3 blanks><non-blank><3 blanks>
|
||||
|
||||
if ($next[0] =~ /^\s*$/ &&
|
||||
$next[1] =~ /^\s*$/ &&
|
||||
$next[2] =~ /^\s*$/)
|
||||
{
|
||||
$blankcount -= 3;
|
||||
$lastwascut = 1;
|
||||
}
|
||||
|
||||
# Otherwise output the saved blanks, the current, and the next three
|
||||
# lines. Remember the last printed line.
|
||||
|
||||
else
|
||||
{
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] =~ s/.\x8//g;
|
||||
print $next[$i];
|
||||
$lastprinted = $_;
|
||||
}
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# This non-blank line is not preceded by 3 or more blank lines. Output
|
||||
# any blanks there are, and the line. Remember it. Force two blank lines
|
||||
# before headings.
|
||||
|
||||
else
|
||||
{
|
||||
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
|
||||
defined($lastprinted);
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# End
|
35
pcre2/Detrail
vendored
Executable file
35
pcre2/Detrail
vendored
Executable file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
# This is a script for removing trailing whitespace from lines in files that
|
||||
# are listed on the command line.
|
||||
|
||||
# This subroutine does the work for one file.
|
||||
|
||||
sub detrail {
|
||||
my($file) = $_[0];
|
||||
my($changed) = 0;
|
||||
open(IN, "$file") || die "Can't open $file for input";
|
||||
@lines = <IN>;
|
||||
close(IN);
|
||||
foreach (@lines)
|
||||
{
|
||||
if (/\s+\n$/)
|
||||
{
|
||||
s/\s+\n$/\n/;
|
||||
$changed = 1;
|
||||
}
|
||||
}
|
||||
if ($changed)
|
||||
{
|
||||
open(OUT, ">$file") || die "Can't open $file for output";
|
||||
print OUT @lines;
|
||||
close(OUT);
|
||||
}
|
||||
}
|
||||
|
||||
# This is the main program
|
||||
|
||||
$, = ""; # Output field separator
|
||||
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
|
||||
|
||||
# End
|
830
pcre2/HACKING
vendored
Normal file
830
pcre2/HACKING
vendored
Normal file
@ -0,0 +1,830 @@
|
||||
Technical Notes about PCRE2
|
||||
---------------------------
|
||||
|
||||
These are very rough technical notes that record potentially useful information
|
||||
about PCRE2 internals. PCRE2 is a library based on the original PCRE library,
|
||||
but with a revised (and incompatible) API. To avoid confusion, the original
|
||||
library is referred to as PCRE1 below. For information about testing PCRE2, see
|
||||
the pcre2test documentation and the comment at the head of the RunTest file.
|
||||
|
||||
PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix
|
||||
releases remain in the 8.xx series. PCRE2 releases started at 10.00 to avoid
|
||||
confusion with PCRE1.
|
||||
|
||||
|
||||
Historical note 1
|
||||
-----------------
|
||||
|
||||
Many years ago I implemented some regular expression functions to an algorithm
|
||||
suggested by Martin Richards. The rather simple patterns were not Unix-like in
|
||||
form, and were quite restricted in what they could do by comparison with Perl.
|
||||
The interesting part about the algorithm was that the amount of space required
|
||||
to hold the compiled form of an expression was known in advance. The code to
|
||||
apply an expression did not operate by backtracking, as the original Henry
|
||||
Spencer code and current PCRE2 and Perl code does, but instead checked all
|
||||
possibilities simultaneously by keeping a list of current states and checking
|
||||
all of them as it advanced through the subject string. In the terminology of
|
||||
Jeffrey Friedl's book, it was a "DFA algorithm", though it was not a
|
||||
traditional Finite State Machine (FSM). When the pattern was all used up, all
|
||||
remaining states were possible matches, and the one matching the longest subset
|
||||
of the subject string was chosen. This did not necessarily maximize the
|
||||
individual wild portions of the pattern, as is expected in Unix and Perl-style
|
||||
regular expressions.
|
||||
|
||||
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version probably doesn't do this any more; I'm talking about
|
||||
the original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
|
||||
For the set of functions that formed the original PCRE1 library in 1997 (which
|
||||
are unrelated to those mentioned above), I tried at first to invent an
|
||||
algorithm that used an amount of store bounded by a multiple of the number of
|
||||
characters in the pattern, to save on compiling time. However, because of the
|
||||
greater complexity in Perl regular expressions, I couldn't do this, even though
|
||||
the then current Perl 5.004 patterns were much simpler than those supported
|
||||
nowadays. In any case, a first pass through the pattern is helpful for other
|
||||
reasons.
|
||||
|
||||
|
||||
Support for 16-bit and 32-bit data strings
|
||||
-------------------------------------------
|
||||
|
||||
The PCRE2 library can be compiled in any combination of 8-bit, 16-bit or 32-bit
|
||||
modes, creating up to three different libraries. In the description that
|
||||
follows, the word "short" is used for a 16-bit data quantity, and the phrase
|
||||
"code unit" is used for a quantity that is a byte in 8-bit mode, a short in
|
||||
16-bit mode and a 32-bit word in 32-bit mode. The names of PCRE2 functions are
|
||||
given in generic form, without the _8, _16, or _32 suffix.
|
||||
|
||||
|
||||
Computing the memory requirement: how it was
|
||||
--------------------------------------------
|
||||
|
||||
Up to and including release 6.7, PCRE1 worked by running a very degenerate
|
||||
first pass to calculate a maximum memory requirement, and then a second pass to
|
||||
do the real compile - which might use a bit less than the predicted amount of
|
||||
memory. The idea was that this would turn out faster than the Henry Spencer
|
||||
code because the first pass is degenerate and the second pass can just store
|
||||
stuff straight into memory, which it knows is big enough.
|
||||
|
||||
|
||||
Computing the memory requirement: how it is
|
||||
-------------------------------------------
|
||||
|
||||
By the time I was working on a potential 6.8 release, the degenerate first pass
|
||||
had become very complicated and hard to maintain. Indeed one of the early
|
||||
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
|
||||
I had a flash of inspiration as to how I could run the real compile function in
|
||||
a "fake" mode that enables it to compute how much memory it would need, while
|
||||
in most cases only ever using a small amount of working memory, and without too
|
||||
many tests of the mode that might slow it down. So I refactored the compiling
|
||||
functions to work this way. This got rid of about 600 lines of source and made
|
||||
further maintenance and development easier. As this was such a major change, I
|
||||
never released 6.8, instead upping the number to 7.0 (other quite major changes
|
||||
were also present in the 7.0 release).
|
||||
|
||||
A side effect of this work was that the previous limit of 200 on the nesting
|
||||
depth of parentheses was removed. However, there was a downside: compiling ran
|
||||
more slowly than before (30% or more, depending on the pattern) because it now
|
||||
did a full analysis of the pattern. My hope was that this would not be a big
|
||||
issue, and in the event, nobody has commented on it.
|
||||
|
||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||
(default 250, settable at build time) so as to put a limit on the amount of
|
||||
system stack used by the compile function, which uses recursive function calls
|
||||
for nested parenthesized groups. This is a safety feature for environments with
|
||||
small stacks where the patterns are provided by users.
|
||||
|
||||
|
||||
Yet another pattern scan
|
||||
------------------------
|
||||
|
||||
History repeated itself for PCRE2 release 10.20. A number of bugs relating to
|
||||
named subpatterns had been discovered by fuzzers. Most of these were related to
|
||||
the handling of forward references when it was not known if the named group was
|
||||
unique. (References to non-unique names use a different opcode and more
|
||||
memory.) The use of duplicate group numbers (the (?| facility) also caused
|
||||
issues.
|
||||
|
||||
To get around these problems I adopted a new approach by adding a third pass
|
||||
over the pattern (really a "pre-pass"), which did nothing other than identify
|
||||
all the named subpatterns and their corresponding group numbers. This means
|
||||
that the actual compile (both the memory-computing dummy run and the real
|
||||
compile) has full knowledge of group names and numbers throughout. Several
|
||||
dozen lines of messy code were eliminated, though the new pre-pass was not
|
||||
short. In particular, parsing and skipping over [] classes is complicated.
|
||||
|
||||
While working on 10.22 I realized that I could simplify yet again by moving
|
||||
more of the parsing into the pre-pass, thus avoiding doing it in two places, so
|
||||
after 10.22 was released, the code underwent yet another big refactoring. This
|
||||
is how it is from 10.23 onwards:
|
||||
|
||||
The function called parse_regex() scans the pattern characters, parsing them
|
||||
into literal data and meta characters. It converts escapes such as \x{123}
|
||||
into literals, handles \Q...\E, and skips over comments and non-significant
|
||||
white space. The result of the scanning is put into a vector of 32-bit unsigned
|
||||
integers. Values less than 0x80000000 are literal data. Higher values represent
|
||||
meta-characters. The top 16-bits of such values identify the meta-character,
|
||||
and these are given names such as META_CAPTURE. The lower 16-bits are available
|
||||
for data, for example, the capturing group number. The only situation in which
|
||||
literal data values greater than 0x7fffffff can appear is when the 32-bit
|
||||
library is running in non-UTF mode. This is handled by having a special
|
||||
meta-character that is followed by the 32-bit data value.
|
||||
|
||||
The size of the parsed pattern vector, when auto-callouts are not enabled, is
|
||||
bounded by the length of the pattern (with one exception). The code is written
|
||||
so that each item in the pattern uses no more vector elements than the number
|
||||
of code units in the item itself. The exception is the aforementioned large
|
||||
32-bit number handling. For this reason, 32-bit non-UTF patterns are scanned in
|
||||
advance to check for such values. When auto-callouts are enabled, the generous
|
||||
assumption is made that there will be a callout for each pattern code unit
|
||||
(which of course is only actually true if all code units are literals) plus one
|
||||
at the end. There is a default parsed pattern vector on the system stack, but
|
||||
if this is not big enough, heap memory is used.
|
||||
|
||||
As before, the actual compiling function is run twice, the first time to
|
||||
determine the amount of memory needed for the final compiled pattern. It
|
||||
now processes the parsed pattern vector, not the pattern itself, although some
|
||||
of the parsed items refer to strings in the pattern - for example, group
|
||||
names. As escapes and comments have already been processed, the code is a bit
|
||||
simpler than before.
|
||||
|
||||
Most errors can be diagnosed during the parsing scan. For those that cannot
|
||||
(for example, "lookbehind assertion is not fixed length"), the parsed code
|
||||
contains offsets into the pattern so that the actual compiling code can
|
||||
report where errors are.
|
||||
|
||||
|
||||
The elements of the parsed pattern vector
|
||||
-----------------------------------------
|
||||
|
||||
The word "offset" below means a code unit offset into the pattern. When
|
||||
PCRE2_SIZE (which is usually size_t) is no bigger than uint32_t, an offset is
|
||||
stored in a single parsed pattern element. Otherwise (typically on 64-bit
|
||||
systems) it occupies two elements. The following meta items occupy just one
|
||||
element, with no data:
|
||||
|
||||
META_ACCEPT (*ACCEPT)
|
||||
META_ASTERISK *
|
||||
META_ASTERISK_PLUS *+
|
||||
META_ASTERISK_QUERY *?
|
||||
META_ATOMIC (?> start of atomic group
|
||||
META_CIRCUMFLEX ^ metacharacter
|
||||
META_CLASS [ start of non-empty class
|
||||
META_CLASS_EMPTY [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS
|
||||
META_CLASS_EMPTY_NOT [^] negative empty class - ditto
|
||||
META_CLASS_END ] end of non-empty class
|
||||
META_CLASS_NOT [^ start non-empty negative class
|
||||
META_COMMIT (*COMMIT)
|
||||
META_COND_ASSERT (?(?assertion)
|
||||
META_DOLLAR $ metacharacter
|
||||
META_DOT . metacharacter
|
||||
META_END End of pattern (this value is 0x80000000)
|
||||
META_FAIL (*FAIL)
|
||||
META_KET ) closing parenthesis
|
||||
META_LOOKAHEAD (?= start of lookahead
|
||||
META_LOOKAHEAD_NA (*napla: start of non-atomic lookahead
|
||||
META_LOOKAHEADNOT (?! start of negative lookahead
|
||||
META_NOCAPTURE (?: no capture parens
|
||||
META_PLUS +
|
||||
META_PLUS_PLUS ++
|
||||
META_PLUS_QUERY +?
|
||||
META_PRUNE (*PRUNE) - no argument
|
||||
META_QUERY ?
|
||||
META_QUERY_PLUS ?+
|
||||
META_QUERY_QUERY ??
|
||||
META_RANGE_ESCAPED hyphen in class range with at least one escape
|
||||
META_RANGE_LITERAL hyphen in class range defined literally
|
||||
META_SKIP (*SKIP) - no argument
|
||||
META_THEN (*THEN) - no argument
|
||||
|
||||
The two RANGE values occur only in character classes. They are positioned
|
||||
between two literals that define the start and end of the range. In an EBCDIC
|
||||
evironment it is necessary to know whether either of the range values was
|
||||
specified as an escape. In an ASCII/Unicode environment the distinction is not
|
||||
relevant.
|
||||
|
||||
The following have data in the lower 16 bits, and may be followed by other data
|
||||
elements:
|
||||
|
||||
META_ALT | alternation
|
||||
META_BACKREF back reference
|
||||
META_CAPTURE start of capturing group
|
||||
META_ESCAPE non-literal escape sequence
|
||||
META_RECURSE recursion call
|
||||
|
||||
If the data for META_ALT is non-zero, it is inside a lookbehind, and the data
|
||||
is the length of its branch, for which OP_REVERSE must be generated.
|
||||
|
||||
META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as
|
||||
their data in the lower 16 bits of the element.
|
||||
|
||||
META_BACKREF is followed by an offset if the back reference group number is 10
|
||||
or more. The offsets of the first ocurrences of references to groups whose
|
||||
numbers are less than 10 are put in cb->small_ref_offset[] (only the first
|
||||
occurrence is useful). On 64-bit systems this avoids using more than two parsed
|
||||
pattern elements for items such as \3. The offset is used when an error occurs
|
||||
because the reference is to a non-existent group.
|
||||
|
||||
META_RECURSE is always followed by an offset, for use in error messages.
|
||||
|
||||
META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
|
||||
element contains the 16-bit type and data property values, packed together.
|
||||
ESC_g and ESC_k are used only for named references - numerical ones are turned
|
||||
into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed
|
||||
by a length and an offset into the pattern to specify the name.
|
||||
|
||||
The following have one data item that follows in the next vector element:
|
||||
|
||||
META_BIGVALUE Next is a literal >= META_END
|
||||
META_OPTIONS (?i) and friends (data is new option bits)
|
||||
META_POSIX POSIX class item (data identifies the class)
|
||||
META_POSIX_NEG negative POSIX class item (ditto)
|
||||
|
||||
The following are followed by a length element, then a number of character code
|
||||
values (which should match with the length):
|
||||
|
||||
META_MARK (*MARK:xxxx)
|
||||
META_COMMIT_ARG )*COMMIT:xxxx)
|
||||
META_PRUNE_ARG (*PRUNE:xxx)
|
||||
META_SKIP_ARG (*SKIP:xxxx)
|
||||
META_THEN_ARG (*THEN:xxxx)
|
||||
|
||||
The following are followed by a length element, then an offset in the pattern
|
||||
that identifies the name:
|
||||
|
||||
META_COND_NAME (?(<name>) or (?('name') or (?(name)
|
||||
META_COND_RNAME (?(R&name)
|
||||
META_COND_RNUMBER (?(Rdigits)
|
||||
META_RECURSE_BYNAME (?&name)
|
||||
META_BACKREF_BYNAME \k'name'
|
||||
|
||||
META_COND_RNUMBER is used for names that start with R and continue with digits,
|
||||
because this is an ambiguous case. It could be a back reference to a group with
|
||||
that name, or it could be a recursion test on a numbered group.
|
||||
|
||||
This one is followed by an offset, for use in error messages, then a number:
|
||||
|
||||
META_COND_NUMBER (?([+-]digits)
|
||||
|
||||
The following is followed just by an offset, for use in error messages:
|
||||
|
||||
META_COND_DEFINE (?(DEFINE)
|
||||
|
||||
The following are also followed just by an offset, but also the lower 16 bits
|
||||
of the main word contain the length of the first branch of the lookbehind
|
||||
group; this is used when generating OP_REVERSE for that branch.
|
||||
|
||||
META_LOOKBEHIND (?<= start of lookbehind
|
||||
META_LOOKBEHIND_NA (*naplb: start of non-atomic lookbehind
|
||||
META_LOOKBEHINDNOT (?<! start of negative lookbehind
|
||||
|
||||
The following are followed by two elements, the minimum and maximum. Repeat
|
||||
values are limited to 65535 (MAX_REPEAT). A maximum value of "unlimited" is
|
||||
represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:
|
||||
|
||||
META_MINMAX {n,m} repeat
|
||||
META_MINMAX_PLUS {n,m}+ repeat
|
||||
META_MINMAX_QUERY {n,m}? repeat
|
||||
|
||||
This one is followed by three elements. The first is 0 for '>' and 1 for '>=';
|
||||
the next two are the major and minor numbers:
|
||||
|
||||
META_COND_VERSION (?(VERSION<op>x.y)
|
||||
|
||||
Callouts are converted into one of two items:
|
||||
|
||||
META_CALLOUT_NUMBER (?C with numerical argument
|
||||
META_CALLOUT_STRING (?C with string argument
|
||||
|
||||
In both cases, the next two elements contain the offset and length of the next
|
||||
item in the pattern. Then there is either one callout number, or a length and
|
||||
an offset for the string argument. The length includes both delimiters.
|
||||
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
|
||||
The "traditional", and original, matching function is called pcre2_match(), and
|
||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
||||
and the way that Perl works. This is not surprising, since it is intended to be
|
||||
as compatible with Perl as possible. This is the function most users of PCRE2
|
||||
will use most of the time. If PCRE2 is compiled with just-in-time (JIT)
|
||||
support, and studying a compiled pattern with JIT is successful, the JIT code
|
||||
is run instead of the normal pcre2_match() code, but the result is the same.
|
||||
|
||||
|
||||
Supplementary matching function
|
||||
-------------------------------
|
||||
|
||||
There is also a supplementary matching function called pcre2_dfa_match(). This
|
||||
implements a DFA matching algorithm that searches simultaneously for all
|
||||
possible matches that start at one point in the subject string. (Going back to
|
||||
my roots: see Historical Note 1 above.) This function intreprets the same
|
||||
compiled pattern data as pcre2_match(); however, not all the facilities are
|
||||
available, and those that are do not always work in quite the same way. See the
|
||||
user documentation for details.
|
||||
|
||||
The algorithm that is used for pcre2_dfa_match() is not a traditional FSM,
|
||||
because it may have a number of states active at one time. More work would be
|
||||
needed at compile time to produce a traditional FSM where only one state is
|
||||
ever active at once. I believe some other regex matchers work this way. JIT
|
||||
support is not available for this kind of matching.
|
||||
|
||||
|
||||
Changeable options
|
||||
------------------
|
||||
|
||||
The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
|
||||
others) may be changed in the middle of patterns by items such as (?i). Their
|
||||
processing is handled entirely at compile time by generating different opcodes
|
||||
for the different settings. The runtime functions do not need to keep track of
|
||||
an option's state.
|
||||
|
||||
PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
|
||||
are tracked and processed during the parsing pre-pass. The others are handled
|
||||
from META_OPTIONS items during the main compile phase.
|
||||
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of unsigned code units (bytes in
|
||||
8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing
|
||||
items of variable length. The first code unit in an item contains an opcode,
|
||||
and the length of the item is either implicit in the opcode or contained in the
|
||||
data that follows it.
|
||||
|
||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||
default value for LINK_SIZE is 2, except for the 32-bit library, where it can
|
||||
only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values,
|
||||
and the 16-bit library can be compiled to use 4-byte values, though this
|
||||
impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is
|
||||
necessary only when patterns whose compiled length is greater than 65535 code
|
||||
units are going to be processed. When a LINK_SIZE value uses more than one code
|
||||
unit, the most significant unit is first.
|
||||
|
||||
In this description, we assume the "normal" compilation options. Data values
|
||||
that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
|
||||
(most significant byte first), and one code unit in 16-bit and 32-bit modes.
|
||||
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
||||
These items are all just one unit long:
|
||||
|
||||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
OP_ALLANY match any one character, including newline
|
||||
OP_ANYBYTE match any single code unit, even in UTF-8/16 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
OP_CIRC ^ (start of data)
|
||||
OP_CIRCM ^ multiline mode (start of data or after newline)
|
||||
OP_NOT_WORD_BOUNDARY \W
|
||||
OP_WORD_BOUNDARY \w
|
||||
OP_NOT_DIGIT \D
|
||||
OP_DIGIT \d
|
||||
OP_NOT_HSPACE \H
|
||||
OP_HSPACE \h
|
||||
OP_NOT_WHITESPACE \S
|
||||
OP_WHITESPACE \s
|
||||
OP_NOT_VSPACE \V
|
||||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or newline at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before final newline)
|
||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ASSERT_ACCEPT )
|
||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||
OP_PRUNE ) OP_CLOSE, each followed by a number that
|
||||
OP_SKIP ) indicates which parentheses must be closed.
|
||||
OP_THEN )
|
||||
|
||||
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||
This ends the assertion, not the entire pattern match. The assertion (?!) is
|
||||
always optimized to OP_FAIL.
|
||||
|
||||
OP_ALLANY is used for '.' when PCRE2_DOTALL is set. It is also used for \C in
|
||||
non-UTF modes and in UTF-32 mode (since one code unit still equals one
|
||||
character). Another use is for [^] when empty classes are permitted
|
||||
(PCRE2_ALLOW_EMPTY_CLASS is set).
|
||||
|
||||
|
||||
Backtracking control verbs
|
||||
--------------------------
|
||||
|
||||
Verbs with no arguments generate opcodes with no following data (as listed
|
||||
in the section above).
|
||||
|
||||
(*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a
|
||||
length in one code unit, and followed by a binary zero. The name length is
|
||||
limited by the size of the code unit.
|
||||
|
||||
(*ACCEPT:NAME) and (*FAIL:NAME) are compiled as (*MARK:NAME)(*ACCEPT) and
|
||||
(*MARK:NAME)(*FAIL) respectively.
|
||||
|
||||
For (*COMMIT:NAME), (*PRUNE:NAME), (*SKIP:NAME), and (*THEN:NAME), the opcodes
|
||||
OP_COMMIT_ARG, OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the
|
||||
name following in the same format as for OP_MARK.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
---------------------------
|
||||
|
||||
The OP_CHAR opcode is followed by a single character that is to be matched
|
||||
casefully. For caseless matching of characters that have at most two
|
||||
case-equivalent code points, OP_CHARI is used. In UTF-8 or UTF-16 modes, the
|
||||
character may be more than one code unit long. In UTF-32 mode, characters are
|
||||
always exactly one code unit long.
|
||||
|
||||
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||
for something like [^a]).
|
||||
|
||||
Caseless matching (positive or negative) of characters that have more than two
|
||||
case-equivalent code points (which is possible only in UTF mode) is handled by
|
||||
compiling a Unicode property item (see below), with the pseudo-property
|
||||
PT_CLIST. The value of this property is an offset in a vector called
|
||||
"ucd_caseless_sets" which identifies the start of a short list of equivalent
|
||||
characters, terminated by the value NOTACHAR (0xffffffff).
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
|
||||
The common repeats (*, +, ?), when applied to a single character, use the
|
||||
following opcodes, which come in caseful and caseless versions:
|
||||
|
||||
Caseful Caseless
|
||||
OP_STAR OP_STARI
|
||||
OP_MINSTAR OP_MINSTARI
|
||||
OP_POSSTAR OP_POSSTARI
|
||||
OP_PLUS OP_PLUSI
|
||||
OP_MINPLUS OP_MINPLUSI
|
||||
OP_POSPLUS OP_POSPLUSI
|
||||
OP_QUERY OP_QUERYI
|
||||
OP_MINQUERY OP_MINQUERYI
|
||||
OP_POSQUERY OP_POSQUERYI
|
||||
|
||||
Each opcode is followed by the character that is to be repeated. In ASCII or
|
||||
UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the
|
||||
length is variable. Those with "MIN" in their names are the minimizing
|
||||
versions. Those with "POS" in their names are possessive versions. Other kinds
|
||||
of repeat make use of these opcodes:
|
||||
|
||||
Caseful Caseless
|
||||
OP_UPTO OP_UPTOI
|
||||
OP_MINUPTO OP_MINUPTOI
|
||||
OP_POSUPTO OP_POSUPTOI
|
||||
OP_EXACT OP_EXACTI
|
||||
|
||||
Each of these is followed by a count and then the repeated character. The count
|
||||
is two bytes long in 8-bit mode (most significant byte first), or one code unit
|
||||
in 16-bit and 32-bit modes.
|
||||
|
||||
OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum
|
||||
and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or
|
||||
OP_MINUPTO or OPT_POSUPTO).
|
||||
|
||||
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||
positive single-character classes.
|
||||
|
||||
|
||||
Repeating character types
|
||||
-------------------------
|
||||
|
||||
Repeats of things like \d are done exactly as for single characters, except
|
||||
that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored
|
||||
in the next code unit. The opcodes are:
|
||||
|
||||
OP_TYPESTAR
|
||||
OP_TYPEMINSTAR
|
||||
OP_TYPEPOSSTAR
|
||||
OP_TYPEPLUS
|
||||
OP_TYPEMINPLUS
|
||||
OP_TYPEPOSPLUS
|
||||
OP_TYPEQUERY
|
||||
OP_TYPEMINQUERY
|
||||
OP_TYPEPOSQUERY
|
||||
OP_TYPEUPTO
|
||||
OP_TYPEMINUPTO
|
||||
OP_TYPEPOSUPTO
|
||||
OP_TYPEEXACT
|
||||
|
||||
|
||||
Match by Unicode property
|
||||
-------------------------
|
||||
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two code units that encode the desired property as a type
|
||||
and a value. The types are a set of #defines of the form PT_xxx, and the values
|
||||
are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
|
||||
The value is relevant only for PT_GC (General Category), PT_PC (Particular
|
||||
Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to
|
||||
identify a list of case-equivalent characters when there are three or more.
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||
value.
|
||||
|
||||
|
||||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||
something like [^a]), except when caselessly matching a character that has more
|
||||
than two case-equivalent code points (which can happen only in UTF mode). In
|
||||
this case a Unicode property item is used, as described above in "Matching
|
||||
literal characters".
|
||||
|
||||
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||
negated, single-character classes. The normal single-character opcodes
|
||||
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||
|
||||
When there is more than one character in a class, and all the code points are
|
||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||
bits are counted from the least significant end of each unit. In caseless mode,
|
||||
bits for both cases are set.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and
|
||||
16-bit and 32-bit modes, subject characters with values greater than 255 can be
|
||||
handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they
|
||||
do.
|
||||
|
||||
For classes containing characters with values greater than 255 or that contain
|
||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable
|
||||
code points are less than 256, followed by a list of pairs (for a range) and/or
|
||||
single characters and/or properties. In caseless mode, all equivalent
|
||||
characters are explicitly listed.
|
||||
|
||||
OP_XCLASS is followed by a LINK_SIZE value containing the total length of the
|
||||
opcode and its data. This is followed by a code unit containing flag bits:
|
||||
XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a
|
||||
bit map is present. There follows the bit map, if XCL_MAP is set, and then a
|
||||
sequence of items coded as follows:
|
||||
|
||||
XCL_END marks the end of the list
|
||||
XCL_SINGLE one character follows
|
||||
XCL_RANGE two characters follow
|
||||
XCL_PROP a Unicode property (type, value) follows
|
||||
XCL_NOTPROP a Unicode property (type, value) follows
|
||||
|
||||
If a range starts with a code point less than 256 and ends with one greater
|
||||
than 255, it is split into two ranges, with characters less than 256 being
|
||||
indicated in the bit map, and the rest with XCL_RANGE.
|
||||
|
||||
When XCL_NOT is set, the bit map, if present, contains bits for characters that
|
||||
are allowed (exactly as for OP_NCLASS), but the list of items that follow it
|
||||
specifies characters and properties that are not allowed.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||
reference number when the reference is to a unique capturing group (either by
|
||||
number or by name). When named groups are used, there may be more than one
|
||||
group with the same name. In this case, a reference to such a group by name
|
||||
generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index
|
||||
(not the byte offset) in the group name table of the first entry for the
|
||||
required name, followed by the number of groups with the same name. The
|
||||
matching code can then search for the first one that is set.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This section
|
||||
applies to other classes and also to back references. In both cases, the repeat
|
||||
information follows the base item. The matching code looks at the following
|
||||
opcode to see if it is one of these:
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPOSSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRPOSPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRPOSQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
OP_CRPOSRANGE
|
||||
|
||||
All but the last three are single-code-unit items, with no data. The range
|
||||
opcodes are followed by the minimum and maximum repeat counts.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing round brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
[Note for North Americans: "bracket" to some English speakers, including
|
||||
myself, can be round, square, curly, or pointy. Hence this usage rather than
|
||||
"parentheses".]
|
||||
|
||||
Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A
|
||||
bracket opcode is followed by a LINK_SIZE value which gives the offset to the
|
||||
next alternative OP_ALT or, if there aren't any branches, to the terminating
|
||||
opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset to the
|
||||
next one, or to the final opcode. For capturing brackets, the bracket number is
|
||||
a count that immediately follows the offset.
|
||||
|
||||
There are several opcodes that mark the end of a subpattern group. OP_KET is
|
||||
used for subpatterns that do not repeat indefinitely, OP_KETRMIN and
|
||||
OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||
respectively, and OP_KETRPOS for possessive repetitions (see below for more
|
||||
details). All four are followed by a LINK_SIZE value giving (as a positive
|
||||
number) the offset back to the matching bracket opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||
single-unit opcodes that tell the matcher that skipping the following
|
||||
subpattern entirely is a valid match. In the case of the first two, not
|
||||
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
||||
because it may be called as a subroutine from elsewhere in the pattern.
|
||||
|
||||
A subpattern with an indefinite maximum repetition is replicated in the
|
||||
compiled data its minimum number of times (or once with OP_BRAZERO if the
|
||||
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
|
||||
as appropriate.
|
||||
|
||||
A subpattern with a bounded maximum repetition is replicated in a nested
|
||||
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
|
||||
before each replication after the minimum, so that, for example, (abc){2,5} is
|
||||
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
|
||||
has the same number.
|
||||
|
||||
When a repeated subpattern has an unbounded upper limit, it is checked to see
|
||||
whether it could match an empty string. If this is the case, the opcode in the
|
||||
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||
OP_KETRMAX, and if so, to break the loop.
|
||||
|
||||
|
||||
Possessive brackets
|
||||
-------------------
|
||||
|
||||
When a repeated group (capturing or non-capturing) is marked as possessive by
|
||||
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
|
||||
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead
|
||||
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||
|
||||
|
||||
Once-only (atomic) groups
|
||||
-------------------------
|
||||
|
||||
These are just like other subpatterns, but they start with the opcode OP_ONCE.
|
||||
The check for matching an empty string in an unbounded repeat is handled
|
||||
entirely at runtime, so there is just this one opcode for atomic groups.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are also just like other subpatterns, but starting with one
|
||||
of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or
|
||||
OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK,
|
||||
OP_ASSERTBACK_NA, and OP_ASSERTBACK_NOT, and the first opcode inside the
|
||||
assertion is OP_REVERSE, followed by a count of the number of characters to
|
||||
move back the pointer in the subject string. In ASCII or UTF-32 mode, the count
|
||||
is also the number of code units, but in UTF-8/16 mode each character may
|
||||
occupy more than one code unit. A separate count is present in each alternative
|
||||
of a lookbehind assertion, allowing each branch to have a different (but fixed)
|
||||
length.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
-----------------------
|
||||
|
||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||
OP_SCOND for one that might match an empty string in an unbounded repeat.
|
||||
|
||||
If the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by a count containing the
|
||||
reference number, provided that the reference is to a unique capturing group.
|
||||
If the reference was by name and there is more than one group with that name,
|
||||
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||
names table, and the number of groups with the same name. The allows the
|
||||
matcher to check if any group with the given name is set.
|
||||
|
||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||
subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the
|
||||
whole pattern") or OP_DNRREF (with data as for OP_DNCREF).
|
||||
|
||||
For a DEFINE condition, OP_FALSE is used (with no associated data). During
|
||||
compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when
|
||||
the conditional group is complete, there can be a check to ensure that it
|
||||
contains only one top-level branch. Once this has happened, the opcode is
|
||||
changed to OP_FALSE, so the matcher never sees OP_DEFINE.
|
||||
|
||||
There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which
|
||||
tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE
|
||||
or OP_FALSE.
|
||||
|
||||
If a condition is not a back reference, recursion test, DEFINE, or VERSION, it
|
||||
must start with a parenthesized atomic assertion, whose opcode normally
|
||||
immediately follows OP_COND or OP_SCOND. However, if automatic callouts are
|
||||
enabled, a callout is inserted immediately before the assertion. It is also
|
||||
possible to insert a manual callout at this point. Only assertion conditions
|
||||
may have callouts preceding the condition.
|
||||
|
||||
A condition that is the negative assertion (?!) is optimized to OP_FAIL in all
|
||||
parts of the pattern, so this is another opcode that may appear as a condition.
|
||||
It is treated the same as OP_FALSE.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current pattern, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting
|
||||
bracket from the start of the whole pattern. OP_RECURSE is also used for
|
||||
"subroutine" calls, even though they are not strictly a recursion. Up till
|
||||
release 10.30 recursions were treated as atomic groups, making them
|
||||
incompatible with Perl (but PCRE had them well before Perl did). From 10.30,
|
||||
backtracking into recursions is supported.
|
||||
|
||||
Repeated recursions used to be wrapped inside OP_ONCE brackets, which not only
|
||||
forced no backtracking, but also allowed repetition to be handled as for other
|
||||
bracketed groups. From 10.30 onwards, repeated recursions are duplicated for
|
||||
their minimum repetitions, and then wrapped in non-capturing brackets for the
|
||||
remainder. For example, (?1){3} is treated as (?1)(?1)(?1), and (?1){2,4} is
|
||||
treated as (?1)(?1)(?:(?1)){0,2}.
|
||||
|
||||
|
||||
Callouts
|
||||
--------
|
||||
|
||||
A callout may have either a numerical argument or a string argument. These use
|
||||
OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are followed by
|
||||
two LINK_SIZE values giving the offset in the pattern string to the start of
|
||||
the following item, and another count giving the length of this item. These
|
||||
values make it possible for pcre2test to output useful tracing information
|
||||
using callouts.
|
||||
|
||||
In the case of a numeric callout, after these two values there is a single code
|
||||
unit containing the callout number, in the range 0-255, with 255 being used for
|
||||
callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT
|
||||
option. Thus, this opcode item is of fixed length:
|
||||
|
||||
[OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER]
|
||||
|
||||
For callouts with string arguments, OP_CALLOUT_STR has three more data items:
|
||||
a LINK_SIZE value giving the complete length of the entire opcode item, a
|
||||
LINK_SIZE item containing the offset within the pattern string to the start of
|
||||
the string argument, and the string itself, preceded by its starting delimiter
|
||||
and followed by a binary zero. When a callout function is called, a pointer to
|
||||
the actual string is passed, but the delimiter can be accessed as string[-1] if
|
||||
the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is
|
||||
compiled as the following bytes (decimal numbers represent binary values):
|
||||
|
||||
[OP_CALLOUT_STR] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0]
|
||||
-------- ------- -------- -------
|
||||
| | | |
|
||||
------- LINK_SIZE items ------
|
||||
|
||||
Opcode table checking
|
||||
---------------------
|
||||
|
||||
The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is
|
||||
not a real opcode, but is used to check at compile time that tables indexed by
|
||||
opcode are the correct length, in order to catch updating errors.
|
||||
|
||||
Philip Hazel
|
||||
12 July 2019
|
368
pcre2/INSTALL
vendored
Normal file
368
pcre2/INSTALL
vendored
Normal file
@ -0,0 +1,368 @@
|
||||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software
|
||||
Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. This file is offered as-is,
|
||||
without warranty of any kind.
|
||||
|
||||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell command './configure && make && make install'
|
||||
should configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the 'README' file for
|
||||
instructions specific to this package. Some packages provide this
|
||||
'INSTALL' file but do not implement all of the features documented
|
||||
below. The lack of an optional feature in a given package is not
|
||||
necessarily a bug. More recommendations for GNU packages can be found
|
||||
in *note Makefile Conventions: (standards)Makefile Conventions.
|
||||
|
||||
The 'configure' shell script attempts to guess correct values for
|
||||
various system-dependent variables used during compilation. It uses
|
||||
those values to create a 'Makefile' in each directory of the package.
|
||||
It may also create one or more '.h' files containing system-dependent
|
||||
definitions. Finally, it creates a shell script 'config.status' that
|
||||
you can run in the future to recreate the current configuration, and a
|
||||
file 'config.log' containing compiler output (useful mainly for
|
||||
debugging 'configure').
|
||||
|
||||
It can also use an optional file (typically called 'config.cache' and
|
||||
enabled with '--cache-file=config.cache' or simply '-C') that saves the
|
||||
results of its tests to speed up reconfiguring. Caching is disabled by
|
||||
default to prevent problems with accidental use of stale cache files.
|
||||
|
||||
If you need to do unusual things to compile the package, please try
|
||||
to figure out how 'configure' could check whether to do them, and mail
|
||||
diffs or instructions to the address given in the 'README' so they can
|
||||
be considered for the next release. If you are using the cache, and at
|
||||
some point 'config.cache' contains results you don't want to keep, you
|
||||
may remove or edit it.
|
||||
|
||||
The file 'configure.ac' (or 'configure.in') is used to create
|
||||
'configure' by a program called 'autoconf'. You need 'configure.ac' if
|
||||
you want to change it or regenerate 'configure' using a newer version of
|
||||
'autoconf'.
|
||||
|
||||
The simplest way to compile this package is:
|
||||
|
||||
1. 'cd' to the directory containing the package's source code and type
|
||||
'./configure' to configure the package for your system.
|
||||
|
||||
Running 'configure' might take a while. While running, it prints
|
||||
some messages telling which features it is checking for.
|
||||
|
||||
2. Type 'make' to compile the package.
|
||||
|
||||
3. Optionally, type 'make check' to run any self-tests that come with
|
||||
the package, generally using the just-built uninstalled binaries.
|
||||
|
||||
4. Type 'make install' to install the programs and any data files and
|
||||
documentation. When installing into a prefix owned by root, it is
|
||||
recommended that the package be configured and built as a regular
|
||||
user, and only the 'make install' phase executed with root
|
||||
privileges.
|
||||
|
||||
5. Optionally, type 'make installcheck' to repeat any self-tests, but
|
||||
this time using the binaries in their final installed location.
|
||||
This target does not install anything. Running this target as a
|
||||
regular user, particularly if the prior 'make install' required
|
||||
root privileges, verifies that the installation completed
|
||||
correctly.
|
||||
|
||||
6. You can remove the program binaries and object files from the
|
||||
source code directory by typing 'make clean'. To also remove the
|
||||
files that 'configure' created (so you can compile the package for
|
||||
a different kind of computer), type 'make distclean'. There is
|
||||
also a 'make maintainer-clean' target, but that is intended mainly
|
||||
for the package's developers. If you use it, you may have to get
|
||||
all sorts of other programs in order to regenerate files that came
|
||||
with the distribution.
|
||||
|
||||
7. Often, you can also type 'make uninstall' to remove the installed
|
||||
files again. In practice, not all packages have tested that
|
||||
uninstallation works correctly, even though it is required by the
|
||||
GNU Coding Standards.
|
||||
|
||||
8. Some packages, particularly those that use Automake, provide 'make
|
||||
distcheck', which can by used by developers to test that all other
|
||||
targets like 'make install' and 'make uninstall' work correctly.
|
||||
This target is generally not run by end users.
|
||||
|
||||
Compilers and Options
|
||||
=====================
|
||||
|
||||
Some systems require unusual options for compilation or linking that
|
||||
the 'configure' script does not know about. Run './configure --help'
|
||||
for details on some of the pertinent environment variables.
|
||||
|
||||
You can give 'configure' initial values for configuration parameters
|
||||
by setting variables in the command line or in the environment. Here is
|
||||
an example:
|
||||
|
||||
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||
|
||||
*Note Defining Variables::, for more details.
|
||||
|
||||
Compiling For Multiple Architectures
|
||||
====================================
|
||||
|
||||
You can compile the package for more than one kind of computer at the
|
||||
same time, by placing the object files for each architecture in their
|
||||
own directory. To do this, you can use GNU 'make'. 'cd' to the
|
||||
directory where you want the object files and executables to go and run
|
||||
the 'configure' script. 'configure' automatically checks for the source
|
||||
code in the directory that 'configure' is in and in '..'. This is known
|
||||
as a "VPATH" build.
|
||||
|
||||
With a non-GNU 'make', it is safer to compile the package for one
|
||||
architecture at a time in the source code directory. After you have
|
||||
installed the package for one architecture, use 'make distclean' before
|
||||
reconfiguring for another architecture.
|
||||
|
||||
On MacOS X 10.5 and later systems, you can create libraries and
|
||||
executables that work on multiple system types--known as "fat" or
|
||||
"universal" binaries--by specifying multiple '-arch' options to the
|
||||
compiler but only a single '-arch' option to the preprocessor. Like
|
||||
this:
|
||||
|
||||
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CPP="gcc -E" CXXCPP="g++ -E"
|
||||
|
||||
This is not guaranteed to produce working output in all cases, you
|
||||
may have to build one architecture at a time and combine the results
|
||||
using the 'lipo' tool if you have problems.
|
||||
|
||||
Installation Names
|
||||
==================
|
||||
|
||||
By default, 'make install' installs the package's commands under
|
||||
'/usr/local/bin', include files under '/usr/local/include', etc. You
|
||||
can specify an installation prefix other than '/usr/local' by giving
|
||||
'configure' the option '--prefix=PREFIX', where PREFIX must be an
|
||||
absolute file name.
|
||||
|
||||
You can specify separate installation prefixes for
|
||||
architecture-specific files and architecture-independent files. If you
|
||||
pass the option '--exec-prefix=PREFIX' to 'configure', the package uses
|
||||
PREFIX as the prefix for installing programs and libraries.
|
||||
Documentation and other data files still use the regular prefix.
|
||||
|
||||
In addition, if you use an unusual directory layout you can give
|
||||
options like '--bindir=DIR' to specify different values for particular
|
||||
kinds of files. Run 'configure --help' for a list of the directories
|
||||
you can set and what kinds of files go in them. In general, the default
|
||||
for these options is expressed in terms of '${prefix}', so that
|
||||
specifying just '--prefix' will affect all of the other directory
|
||||
specifications that were not explicitly provided.
|
||||
|
||||
The most portable way to affect installation locations is to pass the
|
||||
correct locations to 'configure'; however, many packages provide one or
|
||||
both of the following shortcuts of passing variable assignments to the
|
||||
'make install' command line to change installation locations without
|
||||
having to reconfigure or recompile.
|
||||
|
||||
The first method involves providing an override variable for each
|
||||
affected directory. For example, 'make install
|
||||
prefix=/alternate/directory' will choose an alternate location for all
|
||||
directory configuration variables that were expressed in terms of
|
||||
'${prefix}'. Any directories that were specified during 'configure',
|
||||
but not in terms of '${prefix}', must each be overridden at install time
|
||||
for the entire installation to be relocated. The approach of makefile
|
||||
variable overrides for each directory variable is required by the GNU
|
||||
Coding Standards, and ideally causes no recompilation. However, some
|
||||
platforms have known limitations with the semantics of shared libraries
|
||||
that end up requiring recompilation when using this method, particularly
|
||||
noticeable in packages that use GNU Libtool.
|
||||
|
||||
The second method involves providing the 'DESTDIR' variable. For
|
||||
example, 'make install DESTDIR=/alternate/directory' will prepend
|
||||
'/alternate/directory' before all installation names. The approach of
|
||||
'DESTDIR' overrides is not required by the GNU Coding Standards, and
|
||||
does not work on platforms that have drive letters. On the other hand,
|
||||
it does better at avoiding recompilation issues, and works well even
|
||||
when some directory options were not specified in terms of '${prefix}'
|
||||
at 'configure' time.
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
If the package supports it, you can cause programs to be installed
|
||||
with an extra prefix or suffix on their names by giving 'configure' the
|
||||
option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'.
|
||||
|
||||
Some packages pay attention to '--enable-FEATURE' options to
|
||||
'configure', where FEATURE indicates an optional part of the package.
|
||||
They may also pay attention to '--with-PACKAGE' options, where PACKAGE
|
||||
is something like 'gnu-as' or 'x' (for the X Window System). The
|
||||
'README' should mention any '--enable-' and '--with-' options that the
|
||||
package recognizes.
|
||||
|
||||
For packages that use the X Window System, 'configure' can usually
|
||||
find the X include and library files automatically, but if it doesn't,
|
||||
you can use the 'configure' options '--x-includes=DIR' and
|
||||
'--x-libraries=DIR' to specify their locations.
|
||||
|
||||
Some packages offer the ability to configure how verbose the
|
||||
execution of 'make' will be. For these packages, running './configure
|
||||
--enable-silent-rules' sets the default to minimal output, which can be
|
||||
overridden with 'make V=1'; while running './configure
|
||||
--disable-silent-rules' sets the default to verbose, which can be
|
||||
overridden with 'make V=0'.
|
||||
|
||||
Particular systems
|
||||
==================
|
||||
|
||||
On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC
|
||||
is not installed, it is recommended to use the following options in
|
||||
order to use an ANSI C compiler:
|
||||
|
||||
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
|
||||
|
||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
||||
|
||||
HP-UX 'make' updates targets which have the same time stamps as their
|
||||
prerequisites, which makes it generally unusable when shipped generated
|
||||
files such as 'configure' are involved. Use GNU 'make' instead.
|
||||
|
||||
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
||||
parse its '<wchar.h>' header file. The option '-nodtk' can be used as a
|
||||
workaround. If GNU CC is not installed, it is therefore recommended to
|
||||
try
|
||||
|
||||
./configure CC="cc"
|
||||
|
||||
and if that doesn't work, try
|
||||
|
||||
./configure CC="cc -nodtk"
|
||||
|
||||
On Solaris, don't put '/usr/ucb' early in your 'PATH'. This
|
||||
directory contains several dysfunctional programs; working variants of
|
||||
these programs are available in '/usr/bin'. So, if you need '/usr/ucb'
|
||||
in your 'PATH', put it _after_ '/usr/bin'.
|
||||
|
||||
On Haiku, software installed for all users goes in '/boot/common',
|
||||
not '/usr/local'. It is recommended to use the following options:
|
||||
|
||||
./configure --prefix=/boot/common
|
||||
|
||||
Specifying the System Type
|
||||
==========================
|
||||
|
||||
There may be some features 'configure' cannot figure out
|
||||
automatically, but needs to determine by the type of machine the package
|
||||
will run on. Usually, assuming the package is built to be run on the
|
||||
_same_ architectures, 'configure' can figure that out, but if it prints
|
||||
a message saying it cannot guess the machine type, give it the
|
||||
'--build=TYPE' option. TYPE can either be a short name for the system
|
||||
type, such as 'sun4', or a canonical name which has the form:
|
||||
|
||||
CPU-COMPANY-SYSTEM
|
||||
|
||||
where SYSTEM can have one of these forms:
|
||||
|
||||
OS
|
||||
KERNEL-OS
|
||||
|
||||
See the file 'config.sub' for the possible values of each field. If
|
||||
'config.sub' isn't included in this package, then this package doesn't
|
||||
need to know the machine type.
|
||||
|
||||
If you are _building_ compiler tools for cross-compiling, you should
|
||||
use the option '--target=TYPE' to select the type of system they will
|
||||
produce code for.
|
||||
|
||||
If you want to _use_ a cross compiler, that generates code for a
|
||||
platform different from the build platform, you should specify the
|
||||
"host" platform (i.e., that on which the generated programs will
|
||||
eventually be run) with '--host=TYPE'.
|
||||
|
||||
Sharing Defaults
|
||||
================
|
||||
|
||||
If you want to set default values for 'configure' scripts to share,
|
||||
you can create a site shell script called 'config.site' that gives
|
||||
default values for variables like 'CC', 'cache_file', and 'prefix'.
|
||||
'configure' looks for 'PREFIX/share/config.site' if it exists, then
|
||||
'PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||
'CONFIG_SITE' environment variable to the location of the site script.
|
||||
A warning: not all 'configure' scripts look for a site script.
|
||||
|
||||
Defining Variables
|
||||
==================
|
||||
|
||||
Variables not defined in a site shell script can be set in the
|
||||
environment passed to 'configure'. However, some packages may run
|
||||
configure again during the build, and the customized values of these
|
||||
variables may be lost. In order to avoid this problem, you should set
|
||||
them in the 'configure' command line, using 'VAR=value'. For example:
|
||||
|
||||
./configure CC=/usr/local2/bin/gcc
|
||||
|
||||
causes the specified 'gcc' to be used as the C compiler (unless it is
|
||||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an
|
||||
Autoconf limitation. Until the limitation is lifted, you can use this
|
||||
workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
'configure' Invocation
|
||||
======================
|
||||
|
||||
'configure' recognizes the following options to control how it
|
||||
operates.
|
||||
|
||||
'--help'
|
||||
'-h'
|
||||
Print a summary of all of the options to 'configure', and exit.
|
||||
|
||||
'--help=short'
|
||||
'--help=recursive'
|
||||
Print a summary of the options unique to this package's
|
||||
'configure', and exit. The 'short' variant lists options used only
|
||||
in the top level, while the 'recursive' variant lists options also
|
||||
present in any nested packages.
|
||||
|
||||
'--version'
|
||||
'-V'
|
||||
Print the version of Autoconf used to generate the 'configure'
|
||||
script, and exit.
|
||||
|
||||
'--cache-file=FILE'
|
||||
Enable the cache: use and save the results of the tests in FILE,
|
||||
traditionally 'config.cache'. FILE defaults to '/dev/null' to
|
||||
disable caching.
|
||||
|
||||
'--config-cache'
|
||||
'-C'
|
||||
Alias for '--cache-file=config.cache'.
|
||||
|
||||
'--quiet'
|
||||
'--silent'
|
||||
'-q'
|
||||
Do not print messages saying which checks are being made. To
|
||||
suppress all normal output, redirect it to '/dev/null' (any error
|
||||
messages will still be shown).
|
||||
|
||||
'--srcdir=DIR'
|
||||
Look for the package's source code in directory DIR. Usually
|
||||
'configure' can determine that directory automatically.
|
||||
|
||||
'--prefix=DIR'
|
||||
Use DIR as the installation prefix. *note Installation Names:: for
|
||||
more details, including other options available for fine-tuning the
|
||||
installation locations.
|
||||
|
||||
'--no-create'
|
||||
'-n'
|
||||
Run the configure checks, but stop before creating any output
|
||||
files.
|
||||
|
||||
'configure' also accepts some other, not widely useful, options. Run
|
||||
'configure --help' for more details.
|
4
pcre2/LICENCE
vendored
4
pcre2/LICENCE
vendored
@ -20,8 +20,8 @@ THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
347
pcre2/NEWS
vendored
Normal file
347
pcre2/NEWS
vendored
Normal file
@ -0,0 +1,347 @@
|
||||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
|
||||
Version 10.36 04-December-2020
|
||||
------------------------------
|
||||
|
||||
Again, mainly bug fixes and tidies. The only enhancements are the addition of
|
||||
GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the
|
||||
handling of substitution strings for both -O and callouts in pcre2grep, with
|
||||
the addition of $x{...} and $o{...} to allow for characters whose code points
|
||||
are greater than 255 in Unicode mode.
|
||||
|
||||
NOTE: there is an outstanding issue with JIT support for MacOS on arm64
|
||||
hardware. For details, please see Bugzilla issue #2618.
|
||||
|
||||
|
||||
Version 10.35 15-April-2020
|
||||
---------------------------
|
||||
|
||||
Bugfixes, tidies, and a few new enhancements.
|
||||
|
||||
1. Capturing groups that contain recursive backreferences to themselves are no
|
||||
longer automatically atomic, because the restriction is no longer necessary
|
||||
as a result of the 10.30 restructuring.
|
||||
|
||||
2. Several new options for pcre2_substitute().
|
||||
|
||||
3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode
|
||||
character properties are used for upper/lower case computations on characters
|
||||
whose code points are greater than 127.
|
||||
|
||||
4. The character tables (for low-valued characters) can now more easily be
|
||||
saved and restored in binary.
|
||||
|
||||
5. Updated to Unicode 13.0.0.
|
||||
|
||||
|
||||
Version 10.34 21-November-2019
|
||||
------------------------------
|
||||
|
||||
Another release with a few enhancements as well as bugfixes and tidies. The
|
||||
main new features are:
|
||||
|
||||
1. There is now some support for matching in invalid UTF strings.
|
||||
|
||||
2. Non-atomic positive lookarounds are implemented in the pcre2_match()
|
||||
interpreter, but not in JIT.
|
||||
|
||||
3. Added two new functions: pcre2_get_match_data_size() and
|
||||
pcre2_maketables_free().
|
||||
|
||||
4. Upgraded to Unicode 12.1.0.
|
||||
|
||||
|
||||
Version 10.33 16-April-2019
|
||||
---------------------------
|
||||
|
||||
Yet more bugfixes, tidies, and a few enhancements, summarized here (see
|
||||
ChangeLog for the full list):
|
||||
|
||||
1. Callouts from pcre2_substitute() are now available.
|
||||
|
||||
2. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
|
||||
functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
|
||||
names are defined as macros. This should help avoid linking with the wrong
|
||||
library in some environments, while still exporting the POSIX names for
|
||||
pre-existing programs that use them.
|
||||
|
||||
3. Some new options:
|
||||
|
||||
(a) PCRE2_EXTRA_ESCAPED_CR_IS_LF makes \r behave as \n.
|
||||
|
||||
(b) PCRE2_EXTRA_ALT_BSUX enables support for ECMAScript 6's \u{hh...}
|
||||
construct.
|
||||
|
||||
(c) PCRE2_COPY_MATCHED_SUBJECT causes a copy of a matched subject to be
|
||||
made, instead of just remembering a pointer.
|
||||
|
||||
4. Some new Perl features:
|
||||
|
||||
(a) Perl 5.28's experimental alphabetic names for atomic groups and
|
||||
lookaround assertions, for example, (*pla:...) and (*atomic:...).
|
||||
|
||||
(b) The new Perl "script run" features (*script_run:...) and
|
||||
(*atomic_script_run:...) aka (*sr:...) and (*asr:...).
|
||||
|
||||
(c) When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in
|
||||
capture group names.
|
||||
|
||||
5. --disable-percent-zt disables the use of %zu and %td in formatting strings
|
||||
in pcre2test. They were already automatically disabled for VC and older C
|
||||
compilers.
|
||||
|
||||
6. Some changes related to callouts in pcre2grep:
|
||||
|
||||
(a) Support for running an external program under VMS has been added, in
|
||||
addition to Windows and fork() support.
|
||||
|
||||
(b) --disable-pcre2grep-callout-fork restricts the callout support in
|
||||
to the inbuilt echo facility.
|
||||
|
||||
|
||||
Version 10.32 10-September-2018
|
||||
-------------------------------
|
||||
|
||||
This is another mainly bugfix and tidying release with a few minor
|
||||
enhancements. These are the main ones:
|
||||
|
||||
1. pcre2grep now supports the inclusion of binary zeros in patterns that are
|
||||
read from files via the -f option.
|
||||
|
||||
2. ./configure now supports --enable-jit=auto, which automatically enables JIT
|
||||
if the hardware supports it.
|
||||
|
||||
3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for
|
||||
local workspace and local ovectors. Instead, an initial block of stack is
|
||||
reserved, but if this is insufficient, heap memory is used. The heap limit
|
||||
parameter now applies to pcre2_dfa_match().
|
||||
|
||||
4. Updated to Unicode version 11.0.0.
|
||||
|
||||
5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
|
||||
|
||||
6. Added support for \N{U+dddd}, but only in Unicode mode.
|
||||
|
||||
7. Added support for (?^) to unset all imnsx options.
|
||||
|
||||
|
||||
Version 10.31 12-February-2018
|
||||
------------------------------
|
||||
|
||||
This is mainly a bugfix and tidying release (see ChangeLog for full details).
|
||||
However, there are some minor enhancements.
|
||||
|
||||
1. New pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and
|
||||
PCRE2_CONFIG_COMPILED_WIDTHS.
|
||||
|
||||
2. New pcre2_pattern_info() option PCRE2_INFO_EXTRAOPTIONS to retrieve the
|
||||
extra compile time options.
|
||||
|
||||
3. There are now public names for all the pcre2_compile() error numbers.
|
||||
|
||||
4. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new
|
||||
field callout_flags in callout blocks.
|
||||
|
||||
|
||||
Version 10.30 14-August-2017
|
||||
----------------------------
|
||||
|
||||
The full list of changes that includes bugfixes and tidies is, as always, in
|
||||
ChangeLog. These are the most important new features:
|
||||
|
||||
1. The main interpreter, pcre2_match(), has been refactored into a new version
|
||||
that does not use recursive function calls (and therefore the system stack) for
|
||||
remembering backtracking positions. This makes --disable-stack-for-recursion a
|
||||
NOOP. The new implementation allows backtracking into recursive group calls in
|
||||
patterns, making it more compatible with Perl, and also fixes some other
|
||||
previously hard-to-do issues. For patterns that have a lot of backtracking, the
|
||||
heap is now used, and there is an explicit limit on the amount, settable by
|
||||
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). The "recursion limit" is retained,
|
||||
but is renamed as "depth limit" (though the old names remain for
|
||||
compatibility).
|
||||
|
||||
There is also a change in the way callouts from pcre2_match() are handled. The
|
||||
offset_vector field in the callout block is no longer a pointer to the
|
||||
actual ovector that was passed to the matching function in the match data
|
||||
block. Instead it points to an internal ovector of a size large enough to hold
|
||||
all possible captured substrings in the pattern.
|
||||
|
||||
2. The new option PCRE2_ENDANCHORED insists that a pattern match must end at
|
||||
the end of the subject.
|
||||
|
||||
3. The new option PCRE2_EXTENDED_MORE implements Perl's /xx feature, and
|
||||
pcre2test is upgraded to support it. Setting within the pattern by (?xx) is
|
||||
also supported.
|
||||
|
||||
4. (?n) can be used to set PCRE2_NO_AUTO_CAPTURE, because Perl now has this.
|
||||
|
||||
5. Additional compile options in the compile context are now available, and the
|
||||
first two are: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES and
|
||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
|
||||
|
||||
6. The newline type PCRE2_NEWLINE_NUL is now available.
|
||||
|
||||
7. The match limit value now also applies to pcre2_dfa_match() as there are
|
||||
patterns that can use up a lot of resources without necessarily recursing very
|
||||
deeply.
|
||||
|
||||
8. The option REG_PEND (a GNU extension) is now available for the POSIX
|
||||
wrapper. Also there is a new option PCRE2_LITERAL which is used to support
|
||||
REG_NOSPEC.
|
||||
|
||||
9. PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are implemented for the
|
||||
benefit of pcre2grep, and pcre2grep's -F, -w, and -x options are re-implemented
|
||||
using PCRE2_LITERAL, PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This
|
||||
is tidier and also fixes some bugs.
|
||||
|
||||
10. The Unicode tables are upgraded from Unicode 8.0.0 to Unicode 10.0.0.
|
||||
|
||||
11. There are some experimental functions for converting foreign patterns
|
||||
(globs and POSIX patterns) into PCRE2 patterns.
|
||||
|
||||
|
||||
Version 10.23 14-February-2017
|
||||
------------------------------
|
||||
|
||||
1. ChangeLog has the details of a lot of bug fixes and tidies.
|
||||
|
||||
2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax
|
||||
checking is now done in the pre-pass that identifies capturing groups. This has
|
||||
reduced the amount of duplication and made the code tidier. While doing this,
|
||||
some minor bugs and Perl incompatibilities were fixed (see ChangeLog for
|
||||
details.)
|
||||
|
||||
3. Back references are now permitted in lookbehind assertions when there are
|
||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||
reference is by name, there is only one group of that name. The referenced
|
||||
group must, of course be of fixed length.
|
||||
|
||||
4. \g{+<number>} (e.g. \g{+2} ) is now supported. It is a "forward back
|
||||
reference" and can be useful in repetitions (compare \g{-<number>} ). Perl does
|
||||
not recognize this syntax.
|
||||
|
||||
5. pcre2grep now automatically expands its buffer up to a maximum set by
|
||||
--max-buffer-size.
|
||||
|
||||
6. The -t option (grand total) has been added to pcre2grep.
|
||||
|
||||
7. A new function called pcre2_code_copy_with_tables() exists to copy a
|
||||
compiled pattern along with a private copy of the character tables that is
|
||||
uses.
|
||||
|
||||
8. A user supplied a number of patches to upgrade pcre2grep under Windows and
|
||||
tidy the code.
|
||||
|
||||
9. Several updates have been made to pcre2test and test scripts (see
|
||||
ChangeLog).
|
||||
|
||||
|
||||
Version 10.22 29-July-2016
|
||||
--------------------------
|
||||
|
||||
1. ChangeLog has the details of a number of bug fixes.
|
||||
|
||||
2. The POSIX wrapper function regcomp() did not used to support back references
|
||||
and subroutine calls if called with the REG_NOSUB option. It now does.
|
||||
|
||||
3. A new function, pcre2_code_copy(), is added, to make a copy of a compiled
|
||||
pattern.
|
||||
|
||||
4. Support for string callouts is added to pcre2grep.
|
||||
|
||||
5. Added the PCRE2_NO_JIT option to pcre2_match().
|
||||
|
||||
6. The pcre2_get_error_message() function now returns with a negative error
|
||||
code if the error number it is given is unknown.
|
||||
|
||||
7. Several updates have been made to pcre2test and test scripts (see
|
||||
ChangeLog).
|
||||
|
||||
|
||||
Version 10.21 12-January-2016
|
||||
-----------------------------
|
||||
|
||||
1. Many bugs have been fixed. A large number of them were provoked only by very
|
||||
strange pattern input, and were discovered by fuzzers. Some others were
|
||||
discovered by code auditing. See ChangeLog for details.
|
||||
|
||||
2. The Unicode tables have been updated to Unicode version 8.0.0.
|
||||
|
||||
3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a
|
||||
class, where both values are literal letters in the same case, omit the
|
||||
non-letter EBCDIC code points within the range.
|
||||
|
||||
4. There have been a number of enhancements to the pcre2_substitute() function,
|
||||
giving more flexibility to replacement facilities. It is now also possible to
|
||||
cause the function to return the needed buffer size if the one given is too
|
||||
small.
|
||||
|
||||
5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such
|
||||
as (*THEN:name) to be processed for backslashes and to take note of
|
||||
PCRE2_EXTENDED.
|
||||
|
||||
6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a
|
||||
pattern uses \C, and --never-backslash-C makes it possible to compile a version
|
||||
PCRE2 in which the use of \C is always forbidden.
|
||||
|
||||
7. A limit to the length of pattern that can be handled can now be set by
|
||||
calling pcre2_set_max_pattern_length().
|
||||
|
||||
8. When matching an unanchored pattern, a match can be required to begin within
|
||||
a given number of code units after the start of the subject by calling
|
||||
pcre2_set_offset_limit().
|
||||
|
||||
9. The pcre2test program has been extended to test new facilities, and it can
|
||||
now run the tests when LF on its own is not a valid newline sequence.
|
||||
|
||||
10. The RunTest script has also been updated to enable more tests to be run.
|
||||
|
||||
11. There have been some minor performance enhancements.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
||||
1. Callouts with string arguments and the pcre2_callout_enumerate() function
|
||||
have been implemented.
|
||||
|
||||
2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added.
|
||||
|
||||
3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a
|
||||
subject in multiline mode.
|
||||
|
||||
4. The way named subpatterns are handled has been refactored. The previous
|
||||
approach had several bugs.
|
||||
|
||||
5. The handling of \c in EBCDIC environments has been changed to conform to the
|
||||
perlebcdic document. This is an incompatible change.
|
||||
|
||||
6. Bugs have been mended, many of them discovered by fuzzers.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
||||
1. Serialization and de-serialization functions have been added to the API,
|
||||
making it possible to save and restore sets of compiled patterns, though
|
||||
restoration must be done in the same environment that was used for compilation.
|
||||
|
||||
2. The (*NO_JIT) feature has been added; this makes it possible for a pattern
|
||||
creator to specify that JIT is not to be used.
|
||||
|
||||
3. A number of bugs have been fixed. In particular, bugs that caused building
|
||||
on Windows using CMake to fail have been mended.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36. New programs are recommended to use the
|
||||
new library. Programs that use the original (PCRE1) API will need changing
|
||||
before linking with the new library.
|
||||
|
||||
****
|
406
pcre2/NON-AUTOTOOLS-BUILD
vendored
Normal file
406
pcre2/NON-AUTOTOOLS-BUILD
vendored
Normal file
@ -0,0 +1,406 @@
|
||||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
This document contains the following sections:
|
||||
|
||||
General
|
||||
Generic instructions for the PCRE2 C library
|
||||
Stack size in Windows environments
|
||||
Linking programs in Windows environments
|
||||
Calling conventions in Windows environments
|
||||
Comments about Win32 builds
|
||||
Building PCRE2 on Windows with CMake
|
||||
Building PCRE2 on Windows with Visual Studio
|
||||
Testing with RunTest.bat
|
||||
Building PCRE2 on native z/OS and z/VM
|
||||
|
||||
|
||||
GENERAL
|
||||
|
||||
The basic PCRE2 library consists entirely of code written in Standard C, and so
|
||||
should compile successfully on any system that has a Standard C compiler and
|
||||
library.
|
||||
|
||||
The PCRE2 distribution includes a "configure" file for use by the
|
||||
configure/make (autotools) build system, as found in many Unix-like
|
||||
environments. The README file contains information about the options for
|
||||
"configure".
|
||||
|
||||
There is also support for CMake, which some users prefer, especially in Windows
|
||||
environments, though it can also be run in Unix-like environments. See the
|
||||
section entitled "Building PCRE2 on Windows with CMake" below.
|
||||
|
||||
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
|
||||
under the names src/config.h.generic and src/pcre2.h.generic. These are
|
||||
provided for those who build PCRE2 without using "configure" or CMake. If you
|
||||
use "configure" or CMake, the .generic versions are not used.
|
||||
|
||||
|
||||
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
|
||||
|
||||
The following are generic instructions for building the PCRE2 C library "by
|
||||
hand". If you are going to use CMake, this section does not apply to you; you
|
||||
can skip ahead to the CMake section.
|
||||
|
||||
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
|
||||
macro settings that it contains to whatever is appropriate for your
|
||||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators by default.
|
||||
|
||||
When you subsequently compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
sources.
|
||||
|
||||
An alternative approach is not to edit src/config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need to the
|
||||
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
||||
|
||||
NOTE: There have been occasions when the way in which certain parameters
|
||||
in src/config.h are used has changed between releases. (In the
|
||||
configure/make world, this is handled automatically.) When upgrading to a
|
||||
new release, you are strongly advised to review src/config.h.generic
|
||||
before re-using what you had previously.
|
||||
|
||||
Note also that the src/config.h.generic file is created from a config.h
|
||||
that was generated by Autotools, which automatically includes settings of
|
||||
a number of macros that are not actually used by PCRE2 (for example,
|
||||
HAVE_MEMORY_H).
|
||||
|
||||
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
|
||||
|
||||
(3) EITHER:
|
||||
Copy or rename file src/pcre2_chartables.c.dist as
|
||||
src/pcre2_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile src/pcre2_dftables.c as a stand-alone program (using
|
||||
-DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
|
||||
the single argument "src/pcre2_chartables.c". This generates a set of
|
||||
standard character tables and writes them to that file. The tables are
|
||||
generated using the default C locale for your system. If you want to use
|
||||
a locale that is specified by LC_xxx environment variables, add the -L
|
||||
option to the pcre2_dftables command. You must use this method if you
|
||||
are building on a system that uses EBCDIC code.
|
||||
|
||||
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
||||
specify alternative tables at run time.
|
||||
|
||||
(4) For an 8-bit library, compile the following source files from the src
|
||||
directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
|
||||
set -DHAVE_CONFIG_H if you have set up src/config.h with your
|
||||
configuration, or else use other -D settings to change the configuration
|
||||
as required.
|
||||
|
||||
pcre2_auto_possess.c
|
||||
pcre2_chartables.c
|
||||
pcre2_compile.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_convert.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_extuni.c
|
||||
pcre2_find_bracket.c
|
||||
pcre2_jit_compile.c
|
||||
pcre2_maketables.c
|
||||
pcre2_match.c
|
||||
pcre2_match_data.c
|
||||
pcre2_newline.c
|
||||
pcre2_ord2utf.c
|
||||
pcre2_pattern_info.c
|
||||
pcre2_script_run.c
|
||||
pcre2_serialize.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
pcre2_substitute.c
|
||||
pcre2_substring.c
|
||||
pcre2_tables.c
|
||||
pcre2_ucd.c
|
||||
pcre2_valid_utf.c
|
||||
pcre2_xclass.c
|
||||
|
||||
Make sure that you include -I. in the compiler command (or equivalent for
|
||||
an unusual compiler) so that all included PCRE2 header files are first
|
||||
sought in the src directory under the current directory. Otherwise you run
|
||||
the risk of picking up a previously-installed file from somewhere else.
|
||||
|
||||
Note that you must compile pcre2_jit_compile.c, even if you have not
|
||||
defined SUPPORT_JIT in src/config.h, because when JIT support is not
|
||||
configured, dummy functions are compiled. When JIT support IS configured,
|
||||
pcre2_jit_compile.c #includes other files from the sljit subdirectory,
|
||||
all of whose names begin with "sljit". It also #includes
|
||||
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
|
||||
these yourself.
|
||||
|
||||
Note also that the pcre2_fuzzsupport.c file contains special code that is
|
||||
useful to those who want to run fuzzing tests on the PCRE2 library. Unless
|
||||
you are doing that, you can ignore it.
|
||||
|
||||
(5) Now link all the compiled code into an object library in whichever form
|
||||
your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
|
||||
If your system has static and shared libraries, you may have to do this
|
||||
once for each type.
|
||||
|
||||
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
||||
instead of the 8-bit library) just supply 16 or 32 as the value of
|
||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||
|
||||
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
8-bit library), ensure that you have the src/pcre2posix.h file and then
|
||||
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
|
||||
library.
|
||||
|
||||
(8) The pcre2test program can be linked with any combination of the 8-bit,
|
||||
16-bit and 32-bit libraries (depending on what you selected in
|
||||
src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
|
||||
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
|
||||
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
|
||||
needs the pcre2posix wrapper library.
|
||||
|
||||
(9) Run pcre2test on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. There are
|
||||
comments about what each test does in the section entitled "Testing PCRE2"
|
||||
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||
32-bit libraries, you need to run pcre2test with the -16 option to do
|
||||
16-bit tests and with the -32 option to do 32-bit tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options are selected.
|
||||
For example, test 4 is for Unicode support, and will not run if you have
|
||||
built PCRE2 without it. See the comments at the start of each testinput
|
||||
file. If you have a suitable Unix-like shell, the RunTest script will run
|
||||
the appropriate tests for you. The command "RunTest list" will output a
|
||||
list of all the tests.
|
||||
|
||||
Note that the supplied files are in Unix format, with just LF characters
|
||||
as line terminators. You may need to edit them to change this if your
|
||||
system uses a different convention.
|
||||
|
||||
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
|
||||
by running pcre2test with the -jit option. This is done automatically by
|
||||
the RunTest script. You might also like to build and run the freestanding
|
||||
JIT test program, src/pcre2_jit_test.c.
|
||||
|
||||
(11) If you want to use the pcre2grep command, compile and link
|
||||
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
|
||||
need the pcre2posix library). If you have built the PCRE2 library with JIT
|
||||
support by defining SUPPORT_JIT in src/config.h, you can also define
|
||||
SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless
|
||||
it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without
|
||||
defining SUPPORT_JIT, pcre2grep does not try to make use of JIT.
|
||||
|
||||
|
||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||
|
||||
Prior to release 10.30 the default system stack size of 1MiB in some Windows
|
||||
environments caused issues with some tests. This should no longer be the case
|
||||
for 10.30 and later releases.
|
||||
|
||||
|
||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
If you want to statically link a program against a PCRE2 library in the form of
|
||||
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
|
||||
|
||||
|
||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
It is possible to compile programs to use different calling conventions using
|
||||
MSVC. Search the web for "calling conventions" for more information. To make it
|
||||
easier to change the calling convention for the exported functions in the
|
||||
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
|
||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
||||
not set, it defaults to empty; the default calling convention is then used
|
||||
(which is what is wanted most of the time).
|
||||
|
||||
|
||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
|
||||
|
||||
There are two ways of building PCRE2 using the "configure, make, make install"
|
||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||
the same thing; they are completely different from each other. There is also
|
||||
support for building using CMake, which some users find a more straightforward
|
||||
way of building PCRE2 under Windows.
|
||||
|
||||
The MinGW home page (http://www.mingw.org/) says this:
|
||||
|
||||
MinGW: A collection of freely available and freely distributable Windows
|
||||
specific header files and import libraries combined with GNU toolsets that
|
||||
allow one to produce native Windows programs that do not rely on any
|
||||
3rd-party C runtime DLLs.
|
||||
|
||||
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||
|
||||
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||
|
||||
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||
substantial Linux API functionality
|
||||
|
||||
. A collection of tools which provide Linux look and feel.
|
||||
|
||||
On both MinGW and Cygwin, PCRE2 should build correctly using:
|
||||
|
||||
./configure && make && make install
|
||||
|
||||
This should create two libraries called libpcre2-8 and libpcre2-posix. These
|
||||
are independent libraries: when you link with libpcre2-posix you must also link
|
||||
with libpcre2-8, which contains the basic functions.
|
||||
|
||||
Using Cygwin's compiler generates libraries and executables that depend on
|
||||
cygwin1.dll. If a library that is generated this way is distributed,
|
||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||
licence, this forces not only PCRE2 to be under the GPL, but also the entire
|
||||
application. A distributor who wants to keep their own code proprietary must
|
||||
purchase an appropriate Cygwin licence.
|
||||
|
||||
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||
executable that can run standalone on Windows without any third party dll or
|
||||
licensing issues.
|
||||
|
||||
But there is more complication:
|
||||
|
||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||
gcc and MinGW's gcc). So, a user can:
|
||||
|
||||
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||
-mno-cygwin.
|
||||
|
||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||
compiler flags.
|
||||
|
||||
The test files that are supplied with PCRE2 are in UNIX format, with LF
|
||||
characters as line terminators. Unless your PCRE2 library uses a default
|
||||
newline option that includes LF as a valid newline, it may be necessary to
|
||||
change the line terminators in the test files to get some of the tests to work.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH CMAKE
|
||||
|
||||
CMake is an alternative configuration facility that can be used instead of
|
||||
"configure". CMake creates project files (make files, solution files, etc.)
|
||||
tailored to numerous development environments, including Visual Studio,
|
||||
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||
event that errors do occur, it is recommended that you delete the CMake cache
|
||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||
cache can be deleted by selecting "File > Delete Cache".
|
||||
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
|
||||
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
|
||||
directory such as C:\pcre2. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre2\pcre2-xx\build.
|
||||
|
||||
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
|
||||
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||
|
||||
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
|
||||
build directories, respectively.
|
||||
|
||||
6. Hit the "Configure" button.
|
||||
|
||||
7. Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
|
||||
8. The GUI will then list several configuration options. This is where
|
||||
you can disable Unicode support or select other PCRE2 optional features.
|
||||
|
||||
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
|
||||
10. Hit "Generate".
|
||||
|
||||
11. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
|
||||
12. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
|
||||
|
||||
The code currently cannot be compiled without a stdint.h header, which is
|
||||
available only in relatively recent versions of Visual Studio. However, this
|
||||
portable and permissively-licensed implementation of the header worked without
|
||||
issue:
|
||||
|
||||
http://www.azillionmonkeys.com/qed/pstdint.h
|
||||
|
||||
Just rename it and drop it into the top level of the build tree.
|
||||
|
||||
|
||||
TESTING WITH RUNTEST.BAT
|
||||
|
||||
If configured with CMake, building the test project ("make test" or building
|
||||
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
|
||||
on your configuration options, possibly other test programs) in the build
|
||||
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
|
||||
exe paths.
|
||||
|
||||
For manual testing with RunTest.bat, provided the build dir is a subdirectory
|
||||
of the source directory: Open command shell window. Chdir to the location
|
||||
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
|
||||
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
|
||||
|
||||
To run only a particular test with RunTest.Bat provide a test number argument.
|
||||
|
||||
Otherwise:
|
||||
|
||||
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
|
||||
have been created.
|
||||
|
||||
2. Edit RunTest.bat to indentify the full or relative location of
|
||||
the pcre2 source (wherein which the testdata folder resides), e.g.:
|
||||
|
||||
set srcdir=C:\pcre2\pcre2-10.00
|
||||
|
||||
3. In a Windows command environment, chdir to the location of your bat and
|
||||
exe programs.
|
||||
|
||||
4. Run RunTest.bat. Test outputs will automatically be compared to expected
|
||||
results, and discrepancies will be identified in the console output.
|
||||
|
||||
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
|
||||
|
||||
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
|
||||
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
|
||||
applications can be supported through UNIX System Services, and in such an
|
||||
environment it should be possible to build PCRE2 in the same way as in other
|
||||
systems, with the EBCDIC related configuration settings, but it is not known if
|
||||
anybody has tried this.
|
||||
|
||||
In native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||
required. For details, please see file 939 on this web site:
|
||||
|
||||
http://www.cbttape.org
|
||||
|
||||
Everything in that location, source and executable, is in EBCDIC and native
|
||||
z/OS file formats. The port provides an API for LE languages such as COBOL and
|
||||
for the z/OS and z/VM versions of the Rexx languages.
|
||||
|
||||
==============================
|
||||
Last Updated: 14 November 2018
|
||||
==============================
|
236
pcre2/PrepareRelease
vendored
Executable file
236
pcre2/PrepareRelease
vendored
Executable file
@ -0,0 +1,236 @@
|
||||
#/bin/sh
|
||||
|
||||
# Script to prepare the files for building a PCRE2 release. It does some
|
||||
# processing of the documentation, detrails files, and creates pcre2.h.generic
|
||||
# and config.h.generic (for use by builders who can't run ./configure).
|
||||
|
||||
# You must run this script before runnning "make dist". If its first argument
|
||||
# is "doc", it stops after preparing the documentation. There are no other
|
||||
# arguments. The script makes use of the following files:
|
||||
|
||||
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
|
||||
# "knows" the relevant troff constructs that are used in the PCRE2
|
||||
# man pages.
|
||||
|
||||
# CheckMan A Perl script that checks man pages for typos in the mark up.
|
||||
|
||||
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
|
||||
# removing backspaces and other redundant text so as to produce
|
||||
# a readable .txt file.
|
||||
|
||||
# Detrail A Perl script that removes trailing spaces from files.
|
||||
|
||||
# doc/index.html.src
|
||||
# A file that is copied as index.html into the doc/html directory
|
||||
# when the HTML documentation is built. It works like this so that
|
||||
# doc/html can be deleted and re-created from scratch.
|
||||
|
||||
# README & NON-AUTOTOOLS-BUILD
|
||||
# These files are copied into the doc/html directory, with .txt
|
||||
# extensions so that they can by hyperlinked from the HTML
|
||||
# documentation, because some people just go to the HTML without
|
||||
# looking for text files.
|
||||
|
||||
|
||||
# First, sort out the documentation. Remove pcre2demo.3 first because it won't
|
||||
# pass the markup check (it is created below, using markup that none of the
|
||||
# other pages use).
|
||||
|
||||
cd doc
|
||||
echo Processing documentation
|
||||
|
||||
/bin/rm -f pcre2demo.3
|
||||
|
||||
# Check the remaining man pages
|
||||
|
||||
perl ../CheckMan *.1 *.3
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
# Make Text form of the documentation. It needs some mangling to make it
|
||||
# tidy for online reading. Concatenate all the .3 stuff, but omit the
|
||||
# individual function pages.
|
||||
|
||||
cat <<End >pcre2.txt
|
||||
-----------------------------------------------------------------------------
|
||||
This file contains a concatenation of the PCRE2 man pages, converted to plain
|
||||
text format for ease of searching with a text editor, or for use on systems
|
||||
that do not have a man page processor. The small individual files that give
|
||||
synopses of each function in the library have not been included. Neither has
|
||||
the pcre2demo program. There are separate text files for the pcre2grep and
|
||||
pcre2test commands.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
End
|
||||
|
||||
echo "Making pcre2.txt"
|
||||
for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
|
||||
pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \
|
||||
pcre2posix pcre2sample pcre2serialize pcre2syntax \
|
||||
pcre2unicode ; do
|
||||
echo " Processing $file.3"
|
||||
nroff -c -man $file.3 >$file.rawtxt
|
||||
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
|
||||
/bin/rm $file.rawtxt
|
||||
echo "------------------------------------------------------------------------------" >>pcre2.txt
|
||||
if [ "$file" != "pcre2sample" ] ; then
|
||||
echo " " >>pcre2.txt
|
||||
echo " " >>pcre2.txt
|
||||
fi
|
||||
done
|
||||
|
||||
# The three commands
|
||||
for file in pcre2test pcre2grep pcre2-config ; do
|
||||
echo Making $file.txt
|
||||
nroff -c -man $file.1 >$file.rawtxt
|
||||
perl ../CleanTxt <$file.rawtxt >$file.txt
|
||||
/bin/rm $file.rawtxt
|
||||
done
|
||||
|
||||
|
||||
# Make pcre2demo.3 from the pcre2demo.c source file
|
||||
|
||||
echo "Making pcre2demo.3"
|
||||
perl <<"END" >pcre2demo.3
|
||||
open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
|
||||
open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
|
||||
print OUT ".\\\" Start example.\n" .
|
||||
".de EX\n" .
|
||||
". nr mE \\\\n(.f\n" .
|
||||
". nf\n" .
|
||||
". nh\n" .
|
||||
". ft CW\n" .
|
||||
"..\n" .
|
||||
".\n" .
|
||||
".\n" .
|
||||
".\\\" End example.\n" .
|
||||
".de EE\n" .
|
||||
". ft \\\\n(mE\n" .
|
||||
". fi\n" .
|
||||
". hy \\\\n(HY\n" .
|
||||
"..\n" .
|
||||
".\n" .
|
||||
".EX\n" ;
|
||||
while (<IN>)
|
||||
{
|
||||
s/\\/\\e/g;
|
||||
print OUT;
|
||||
}
|
||||
print OUT ".EE\n";
|
||||
close(IN);
|
||||
close(OUT);
|
||||
END
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# Make HTML form of the documentation.
|
||||
|
||||
echo "Making HTML documentation"
|
||||
/bin/rm html/*
|
||||
cp index.html.src html/index.html
|
||||
cp ../README html/README.txt
|
||||
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
|
||||
|
||||
for file in *.1 ; do
|
||||
base=`basename $file .1`
|
||||
echo " Making $base.html"
|
||||
perl ../132html -toc $base <$file >html/$base.html
|
||||
done
|
||||
|
||||
# Exclude table of contents for function summaries. It seems that expr
|
||||
# forces an anchored regex. Also exclude them for small pages that have
|
||||
# only one section.
|
||||
|
||||
for file in *.3 ; do
|
||||
base=`basename $file .3`
|
||||
toc=-toc
|
||||
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
|
||||
if [ "$base" = "pcre2sample" ] || \
|
||||
[ "$base" = "pcre2compat" ] || \
|
||||
[ "$base" = "pcre2limits" ] || \
|
||||
[ "$base" = "pcre2unicode" ] ; then
|
||||
toc=""
|
||||
fi
|
||||
echo " Making $base.html"
|
||||
perl ../132html $toc $base <$file >html/$base.html
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
done
|
||||
|
||||
# End of documentation processing; stop if only documentation required.
|
||||
|
||||
cd ..
|
||||
echo Documentation done
|
||||
if [ "$1" = "doc" ] ; then exit; fi
|
||||
|
||||
# These files are detrailed; do not detrail the test data because there may be
|
||||
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
|
||||
# line endings and the detrail script removes all trailing white space. The
|
||||
# configure files are also omitted from the detrailing.
|
||||
|
||||
files="\
|
||||
Makefile.am \
|
||||
configure.ac \
|
||||
README \
|
||||
LICENCE \
|
||||
COPYING \
|
||||
AUTHORS \
|
||||
NEWS \
|
||||
NON-AUTOTOOLS-BUILD \
|
||||
INSTALL \
|
||||
132html \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
ChangeLog \
|
||||
CMakeLists.txt \
|
||||
RunGrepTest \
|
||||
RunTest \
|
||||
pcre2-config.in \
|
||||
perltest.sh \
|
||||
libpcre2-8.pc.in \
|
||||
libpcre2-16.pc.in \
|
||||
libpcre2-32.pc.in \
|
||||
libpcre2-posix.pc.in \
|
||||
src/pcre2_dftables.c \
|
||||
src/pcre2.h.in \
|
||||
src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c \
|
||||
src/pcre2_config.c \
|
||||
src/pcre2_context.c \
|
||||
src/pcre2_convert.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_extuni.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
src/pcre2_jit_match.c \
|
||||
src/pcre2_jit_misc.c \
|
||||
src/pcre2_jit_test.c \
|
||||
src/pcre2_maketables.c \
|
||||
src/pcre2_match.c \
|
||||
src/pcre2_match_data.c \
|
||||
src/pcre2_newline.c \
|
||||
src/pcre2_ord2utf.c \
|
||||
src/pcre2_pattern_info.c \
|
||||
src/pcre2_printint.c \
|
||||
src/pcre2_string_utils.c \
|
||||
src/pcre2_study.c \
|
||||
src/pcre2_substring.c \
|
||||
src/pcre2_tables.c \
|
||||
src/pcre2_ucd.c \
|
||||
src/pcre2_ucp.h \
|
||||
src/pcre2_valid_utf.c \
|
||||
src/pcre2_xclass.c \
|
||||
src/pcre2demo.c \
|
||||
src/pcre2grep.c \
|
||||
src/pcre2posix.c \
|
||||
src/pcre2posix.h \
|
||||
src/pcre2test.c"
|
||||
|
||||
echo Detrailing
|
||||
perl ./Detrail $files doc/p* doc/html/*
|
||||
|
||||
echo Done
|
||||
|
||||
#End
|
906
pcre2/README
vendored
Normal file
906
pcre2/README
vendored
Normal file
@ -0,0 +1,906 @@
|
||||
README file for PCRE2 (Perl-compatible regular expression library)
|
||||
------------------------------------------------------------------
|
||||
|
||||
PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
|
||||
API. Since its initial release in 2015, there has been further development of
|
||||
the code and it now differs from PCRE1 in more than just the API. There are new
|
||||
features and the internals have been improved. The latest release of PCRE2 is
|
||||
available in three alternative formats from:
|
||||
|
||||
https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.gz
|
||||
https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.bz2
|
||||
https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||
subscribe or manage your subscription here:
|
||||
|
||||
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release. The
|
||||
contents of this README file are:
|
||||
|
||||
The PCRE2 APIs
|
||||
Documentation for PCRE2
|
||||
Contributions by users of PCRE2
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
Building PCRE2 without using autotools
|
||||
Building PCRE2 using autotools
|
||||
Retrieving configuration information
|
||||
Shared libraries
|
||||
Cross-compiling using autotools
|
||||
Making new tarballs
|
||||
Testing PCRE2
|
||||
Character tables
|
||||
File manifest
|
||||
|
||||
|
||||
The PCRE2 APIs
|
||||
--------------
|
||||
|
||||
PCRE2 is written in C, and it has its own API. There are three sets of
|
||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||
32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there
|
||||
are no C++ wrappers.
|
||||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
library that are based on the POSIX regular expression API (see the pcre2posix
|
||||
man page). These are built into a library called libpcre2-posix. Note that this
|
||||
just provides a POSIX calling interface to PCRE2; the regular expressions
|
||||
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
||||
and does not give full access to all of PCRE2's facilities.
|
||||
|
||||
The header file for the POSIX-style functions is called pcre2posix.h. The
|
||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE2 with
|
||||
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
||||
renamed or pointed at by a link (or the program modified, of course). See the
|
||||
pcre2posix documentation for more details.
|
||||
|
||||
|
||||
Documentation for PCRE2
|
||||
-----------------------
|
||||
|
||||
If you install PCRE2 in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre2". The one that is
|
||||
just called "pcre2" lists all the others. In addition to these man pages, the
|
||||
PCRE2 documentation is supplied in two other forms:
|
||||
|
||||
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
|
||||
doc/pcre2test.txt in the source distribution. The first of these is a
|
||||
concatenation of the text forms of all the section 3 man pages except the
|
||||
listing of pcre2demo.c and those that summarize individual functions. The
|
||||
other two are the text forms of the section 1 man pages for the pcre2grep
|
||||
and pcre2test commands. These text forms are provided for ease of scanning
|
||||
with text editors or similar tools. They are installed in
|
||||
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
|
||||
(defaulting to /usr/local).
|
||||
|
||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||
in various ways, and rooted in a file called index.html, is distributed in
|
||||
doc/html and installed in <prefix>/share/doc/pcre2/html.
|
||||
|
||||
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
---------------------------------------
|
||||
|
||||
For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if
|
||||
your system supports the use of "configure" and "make" you may be able to build
|
||||
PCRE2 using autotools in the same way as for many Unix-like systems.
|
||||
|
||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||
|
||||
PCRE2 has been compiled on many different operating systems. It should be
|
||||
straightforward to build PCRE2 on any system that has a Standard C compiler and
|
||||
library, because it uses only Standard C functions.
|
||||
|
||||
|
||||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
The use of autotools (in particular, libtool) is problematic in some
|
||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||
file for ways of building PCRE2 without using autotools.
|
||||
|
||||
|
||||
Building PCRE2 using autotools
|
||||
------------------------------
|
||||
|
||||
The following instructions assume the use of the widely used "configure; make;
|
||||
make install" (autotools) process.
|
||||
|
||||
To build PCRE2 on system that supports autotools, first run the "configure"
|
||||
command from the PCRE2 distribution directory, with your current directory set
|
||||
to the directory where you want the files to be created. This command is a
|
||||
standard GNU "autoconf" configuration script, for which generic instructions
|
||||
are supplied in the file INSTALL.
|
||||
|
||||
Most commonly, people build PCRE2 within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient. However,
|
||||
the usual methods of changing standard defaults are available. For example:
|
||||
|
||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||
|
||||
This command specifies that the C compiler should be run with the flags '-O2
|
||||
-Wall' instead of the default, and that "make install" should install PCRE2
|
||||
under /opt/local instead of the default /usr/local.
|
||||
|
||||
If you want to build in a different directory, just run "configure" with that
|
||||
directory as current. For example, suppose you have unpacked the PCRE2 source
|
||||
into /source/pcre2/pcre2-xxx, but you want to build it in
|
||||
/build/pcre2/pcre2-xxx:
|
||||
|
||||
cd /build/pcre2/pcre2-xxx
|
||||
/source/pcre2/pcre2-xxx/configure
|
||||
|
||||
PCRE2 is written in C and is normally compiled as a C library. However, it is
|
||||
possible to build it as a C++ library, though the provided building apparatus
|
||||
does not have any features to support this.
|
||||
|
||||
There are some optional features that can be included or omitted from the PCRE2
|
||||
library. They are also documented in the pcre2build man page.
|
||||
|
||||
. By default, both shared and static libraries are built. You can change this
|
||||
by adding one of these options to the "configure" command:
|
||||
|
||||
--disable-shared
|
||||
--disable-static
|
||||
|
||||
(See also "Shared libraries on Unix-like systems" below.)
|
||||
|
||||
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
|
||||
the "configure" command, the 16-bit library is also built. If you add
|
||||
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
|
||||
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
|
||||
to disable building the 8-bit library.
|
||||
|
||||
. If you want to include support for just-in-time (JIT) compiling, which can
|
||||
give large performance improvements on certain platforms, add --enable-jit to
|
||||
the "configure" command. This support is available only for certain hardware
|
||||
architectures. If you try to enable it on an unsupported architecture, there
|
||||
will be a compile time error. If in doubt, use --enable-jit=auto, which
|
||||
enables JIT only if the current hardware is supported.
|
||||
|
||||
. If you are enabling JIT under SELinux environment you may also want to add
|
||||
--enable-jit-sealloc, which enables the use of an executable memory allocator
|
||||
that is compatible with SELinux. Warning: this allocator is experimental!
|
||||
It does not support fork() operation and may crash when no disk space is
|
||||
available. This option has no effect if JIT is disabled.
|
||||
|
||||
. If you do not want to make use of the default support for UTF-8 Unicode
|
||||
character strings in the 8-bit library, UTF-16 Unicode character strings in
|
||||
the 16-bit library, or UTF-32 Unicode character strings in the 32-bit
|
||||
library, you can add --disable-unicode to the "configure" command. This
|
||||
reduces the size of the libraries. It is not possible to configure one
|
||||
library with Unicode support, and another without, in the same configuration.
|
||||
It is also not possible to use --enable-ebcdic (see below) with Unicode
|
||||
support, so if this option is set, you must also use --disable-unicode.
|
||||
|
||||
When Unicode support is available, the use of a UTF encoding still has to be
|
||||
enabled by setting the PCRE2_UTF option at run time or starting a pattern
|
||||
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
|
||||
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms.
|
||||
|
||||
As well as supporting UTF strings, Unicode support includes support for the
|
||||
\P, \p, and \X sequences that recognize Unicode character properties.
|
||||
However, only the basic two-letter properties such as Lu are supported.
|
||||
Escape sequences such as \d and \w in patterns do not by default make use of
|
||||
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
|
||||
or starting a pattern with (*UCP).
|
||||
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
|
||||
of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
|
||||
character as indicating the end of a line. Whatever you specify at build time
|
||||
is the default; the caller of PCRE2 can change the selection at run time. The
|
||||
default newline indicator is a single LF character (the Unix standard). You
|
||||
can specify the default newline indicator by adding --enable-newline-is-cr,
|
||||
--enable-newline-is-lf, --enable-newline-is-crlf,
|
||||
--enable-newline-is-anycrlf, --enable-newline-is-any, or
|
||||
--enable-newline-is-nul to the "configure" command, respectively.
|
||||
|
||||
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||
sequence. This is independent of the option specifying what PCRE2 considers
|
||||
to be the end of a line (see above). However, the caller of PCRE2 can
|
||||
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
||||
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||
|
||||
. In a pattern, the escape sequence \C matches a single code unit, even in a
|
||||
UTF mode. This can be dangerous because it breaks up multi-code-unit
|
||||
characters. You can build PCRE2 with the use of \C permanently locked out by
|
||||
adding --enable-never-backslash-C (note the upper case C) to the "configure"
|
||||
command. When \C is allowed by the library, individual applications can lock
|
||||
it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option.
|
||||
|
||||
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
example,
|
||||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE2 has a counter that can be set to limit the amount of computing resource
|
||||
it uses when matching a pattern. If the limit is exceeded during a match, the
|
||||
match fails. The default is ten million. You can change the default by
|
||||
setting, for example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
on the "configure" command. This is just the default; individual calls to
|
||||
pcre2_match() or pcre2_dfa_match() can supply their own value. There is more
|
||||
discussion in the pcre2api man page (search for pcre2_set_match_limit).
|
||||
|
||||
. There is a separate counter that limits the depth of nested backtracking
|
||||
(pcre2_match()) or nested function calls (pcre2_dfa_match()) during a
|
||||
matching process, which indirectly limits the amount of heap memory that is
|
||||
used, and in the case of pcre2_dfa_match() the amount of stack as well. This
|
||||
counter also has a default of ten million, which is essentially "unlimited".
|
||||
You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-depth=5000
|
||||
|
||||
There is more discussion in the pcre2api man page (search for
|
||||
pcre2_set_depth_limit).
|
||||
|
||||
. You can also set an explicit limit on the amount of heap memory used by
|
||||
the pcre2_match() and pcre2_dfa_match() interpreters:
|
||||
|
||||
--with-heap-limit=500
|
||||
|
||||
The units are kibibytes (units of 1024 bytes). This limit does not apply when
|
||||
the JIT optimization (which has its own memory control features) is used.
|
||||
There is more discussion on the pcre2api man page (search for
|
||||
pcre2_set_heap_limit).
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64 kibibytes. You can increase this by adding --with-link-size=3 to the
|
||||
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
||||
to different parts of the compiled pattern. In the 16-bit library,
|
||||
--with-link-size=3 is the same as --with-link-size=4, which (in both
|
||||
libraries) uses four-byte offsets. Increasing the internal link size reduces
|
||||
performance in the 8-bit and 16-bit libraries. In the 32-bit library, the
|
||||
link size setting is ignored, as 4-byte offsets are always used.
|
||||
|
||||
. For speed, PCRE2 uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, it uses a set of
|
||||
tables for ASCII encoding that is part of the distribution. If you specify
|
||||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called pcre2_dftables is compiled and run in the default C locale
|
||||
when you obey "make". It builds a source file called pcre2_chartables.c. If
|
||||
you do not specify this option, pcre2_chartables.c is created as a copy of
|
||||
pcre2_chartables.c.dist. See "Character tables" below for further
|
||||
information.
|
||||
|
||||
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
|
||||
character code (as opposed to ASCII/Unicode) by specifying
|
||||
|
||||
--enable-ebcdic --disable-unicode
|
||||
|
||||
This automatically implies --enable-rebuild-chartables (see above). However,
|
||||
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
|
||||
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
|
||||
which specifies that the code value for the EBCDIC NL character is 0x25
|
||||
instead of the default 0x15.
|
||||
|
||||
. If you specify --enable-debug, additional debugging code is included in the
|
||||
build. This option is intended for use by the PCRE2 maintainers.
|
||||
|
||||
. In environments where valgrind is installed, if you specify
|
||||
|
||||
--enable-valgrind
|
||||
|
||||
PCRE2 will use valgrind annotations to mark certain memory regions as
|
||||
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||
mostly useful for debugging PCRE2 itself.
|
||||
|
||||
. In environments where the gcc compiler is used and lcov is installed, if you
|
||||
specify
|
||||
|
||||
--enable-coverage
|
||||
|
||||
the build process implements a code coverage report for the test suite. The
|
||||
report is generated by running "make coverage". If ccache is installed on
|
||||
your system, it must be disabled when building PCRE2 for coverage reporting.
|
||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||
running "make" to build PCRE2. There is more information about coverage
|
||||
reporting in the "pcre2build" documentation.
|
||||
|
||||
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
|
||||
you add --disable-pcre2grep-jit to the "configure" command.
|
||||
|
||||
. There is support for calling external programs during matching in the
|
||||
pcre2grep command, using PCRE2's callout facility with string arguments. This
|
||||
support can be disabled by adding --disable-pcre2grep-callout to the
|
||||
"configure" command. There are two kinds of callout: one that generates
|
||||
output from inbuilt code, and another that calls an external program. The
|
||||
latter has special support for Windows and VMS; otherwise it assumes the
|
||||
existence of the fork() function. This facility can be disabled by adding
|
||||
--disable-pcre2grep-callout-fork to the "configure" command.
|
||||
|
||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
|
||||
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
|
||||
specifying one or both of
|
||||
|
||||
--enable-pcre2grep-libz
|
||||
--enable-pcre2grep-libbz2
|
||||
|
||||
Of course, the relevant libraries must be installed on your system.
|
||||
|
||||
. The default starting size (in bytes) of the internal buffer used by pcre2grep
|
||||
can be set by, for example:
|
||||
|
||||
--with-pcre2grep-bufsize=51200
|
||||
|
||||
The value must be a plain integer. The default is 20480. The amount of memory
|
||||
used by pcre2grep is actually three times this number, to allow for "before"
|
||||
and "after" lines. If very long lines are encountered, the buffer is
|
||||
automatically enlarged, up to a fixed maximum size.
|
||||
|
||||
. The default maximum size of pcre2grep's internal buffer can be set by, for
|
||||
example:
|
||||
|
||||
--with-pcre2grep-max-bufsize=2097152
|
||||
|
||||
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
||||
whichever is the larger.
|
||||
|
||||
. It is possible to compile pcre2test so that it links with the libreadline
|
||||
or libedit libraries, by specifying, respectively,
|
||||
|
||||
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
|
||||
|
||||
If this is done, when pcre2test's input is from a terminal, it reads it using
|
||||
the readline() function. This provides line-editing and history facilities.
|
||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||
pcre2test linked in this way, there may be licensing issues. These can be
|
||||
avoided by linking with libedit (which has a BSD licence) instead.
|
||||
|
||||
Enabling libreadline causes the -lreadline option to be added to the
|
||||
pcre2test build. In many operating environments with a sytem-installed
|
||||
readline library this is sufficient. However, in some environments (e.g. if
|
||||
an unmodified distribution version of readline is in use), it may be
|
||||
necessary to specify something like LIBS="-lncurses" as well. This is
|
||||
because, to quote the readline INSTALL, "Readline uses the termcap functions,
|
||||
but does not link with the termcap or curses library itself, allowing
|
||||
applications which link with readline the to choose an appropriate library."
|
||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||
should fix it.
|
||||
|
||||
. The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||
defined and has a value greater than or equal to 199901L (indicating C99).
|
||||
However, there is at least one environment that claims to be C99 but does not
|
||||
support these modifiers. If --disable-percent-zt is specified, no use is made
|
||||
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
||||
size_t values.
|
||||
|
||||
. There is a special option called --enable-fuzz-support for use by people who
|
||||
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
||||
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
||||
be built, but not installed. This contains a single function called
|
||||
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
||||
length of the string. When called, this function tries to compile the string
|
||||
as a pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the string.
|
||||
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
||||
be created. This is normally run under valgrind or used when PCRE2 is
|
||||
compiled with address sanitizing enabled. It calls the fuzzing function and
|
||||
outputs information about it is doing. The input strings are specified by
|
||||
arguments: if an argument starts with "=" the rest of it is a literal input
|
||||
string. Otherwise, it is assumed to be a file name, and the contents of the
|
||||
file are the test string.
|
||||
|
||||
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
|
||||
which caused pcre2_match() to use individual blocks on the heap for
|
||||
backtracking instead of recursive function calls (which use the stack). This
|
||||
is now obsolete since pcre2_match() was refactored always to use the heap (in
|
||||
a much more efficient way than before). This option is retained for backwards
|
||||
compatibility, but has no effect other than to output a warning.
|
||||
|
||||
The "configure" script builds the following files for the basic C library:
|
||||
|
||||
. Makefile the makefile that builds the library
|
||||
. src/config.h build-time configuration options for the library
|
||||
. src/pcre2.h the public PCRE2 header file
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-16.pc ) data for the pkg-config command
|
||||
. libpcre2-32.pc )
|
||||
. libpcre2-posix.pc )
|
||||
. libtool script that builds shared and/or static libraries
|
||||
|
||||
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
|
||||
tarballs under the names config.h.generic and pcre2.h.generic. These are
|
||||
provided for those who have to build PCRE2 without using "configure" or CMake.
|
||||
If you use "configure" or CMake, the .generic versions are not used.
|
||||
|
||||
The "configure" script also creates config.status, which is an executable
|
||||
script that can be run to recreate the configuration, and config.log, which
|
||||
contains compiler output from tests that "configure" runs.
|
||||
|
||||
Once "configure" has run, you can run "make". This builds whichever of the
|
||||
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
|
||||
program called pcre2test. If you enabled JIT support with --enable-jit, another
|
||||
test program called pcre2_jit_test is built as well. If the 8-bit library is
|
||||
built, libpcre2-posix and the pcre2grep command are also built. Running
|
||||
"make" with the -j option may speed up compilation on multiprocessor systems.
|
||||
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE2
|
||||
tests are given below in a separate section of this document. The -j option of
|
||||
"make" can also be used when running the tests.
|
||||
|
||||
You can use "make install" to install PCRE2 into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
<prefix> that is set when "configure" is run):
|
||||
|
||||
Commands (bin):
|
||||
pcre2test
|
||||
pcre2grep (if 8-bit support is enabled)
|
||||
pcre2-config
|
||||
|
||||
Libraries (lib):
|
||||
libpcre2-8 (if 8-bit support is enabled)
|
||||
libpcre2-16 (if 16-bit support is enabled)
|
||||
libpcre2-32 (if 32-bit support is enabled)
|
||||
libpcre2-posix (if 8-bit support is enabled)
|
||||
|
||||
Configuration information (lib/pkgconfig):
|
||||
libpcre2-8.pc
|
||||
libpcre2-16.pc
|
||||
libpcre2-32.pc
|
||||
libpcre2-posix.pc
|
||||
|
||||
Header files (include):
|
||||
pcre2.h
|
||||
pcre2posix.h
|
||||
|
||||
Man pages (share/man/man{1,3}):
|
||||
pcre2grep.1
|
||||
pcre2test.1
|
||||
pcre2-config.1
|
||||
pcre2.3
|
||||
pcre2*.3 (lots more pages, all starting "pcre2")
|
||||
|
||||
HTML documentation (share/doc/pcre2/html):
|
||||
index.html
|
||||
*.html (lots more pages, hyperlinked from index.html)
|
||||
|
||||
Text file documentation (share/doc/pcre2):
|
||||
AUTHORS
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
pcre2.txt (a concatenation of the man(3) pages)
|
||||
pcre2test.txt the pcre2test man page
|
||||
pcre2grep.txt the pcre2grep man page
|
||||
pcre2-config.txt the pcre2-config man page
|
||||
|
||||
If you want to remove PCRE2 from your system, you can run "make uninstall".
|
||||
This removes all the files that "make install" installed. However, it does not
|
||||
remove any directories, because these are often shared with other programs.
|
||||
|
||||
|
||||
Retrieving configuration information
|
||||
------------------------------------
|
||||
|
||||
Running "make install" installs the command pcre2-config, which can be used to
|
||||
recall information about the PCRE2 configuration and installation. For example:
|
||||
|
||||
pcre2-config --version
|
||||
|
||||
prints the version number, and
|
||||
|
||||
pcre2-config --libs8
|
||||
|
||||
outputs information about where the 8-bit library is installed. This command
|
||||
can be included in makefiles for programs that use PCRE2, saving the programmer
|
||||
from having to remember too many details. Run pcre2-config with no arguments to
|
||||
obtain a list of possible arguments.
|
||||
|
||||
The pkg-config command is another system for saving and retrieving information
|
||||
about installed libraries. Instead of separate commands for each library, a
|
||||
single command is used. For example:
|
||||
|
||||
pkg-config --libs libpcre2-16
|
||||
|
||||
The data is held in *.pc files that are installed in a directory called
|
||||
<prefix>/lib/pkgconfig.
|
||||
|
||||
|
||||
Shared libraries
|
||||
----------------
|
||||
|
||||
The default distribution builds PCRE2 as shared libraries and static libraries,
|
||||
as long as the operating system supports shared libraries. Shared library
|
||||
support relies on the "libtool" script which is built as part of the
|
||||
"configure" process.
|
||||
|
||||
The libtool script is used to compile and link both shared and static
|
||||
libraries. They are placed in a subdirectory called .libs when they are newly
|
||||
built. The programs pcre2test and pcre2grep are built to use these uninstalled
|
||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||
you use "make install" to install shared libraries, pcre2grep and pcre2test are
|
||||
automatically re-built to use the newly installed shared libraries before being
|
||||
installed themselves. However, the versions left in the build directory still
|
||||
use the uninstalled libraries.
|
||||
|
||||
To build PCRE2 using static libraries only you must use --disable-shared when
|
||||
configuring it. For example:
|
||||
|
||||
./configure --prefix=/usr/gnu --disable-shared
|
||||
|
||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||
build only shared libraries.
|
||||
|
||||
|
||||
Cross-compiling using autotools
|
||||
-------------------------------
|
||||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE2 for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
|
||||
source file is compiled and run on the local host, in order to generate the
|
||||
inbuilt character tables (the pcre2_chartables.c file). This will probably not
|
||||
work, because pcre2_dftables.c needs to be compiled with the local compiler,
|
||||
not the cross compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
||||
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
||||
tables that assumes ASCII code. Cross-compiling with the default tables should
|
||||
not be a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
|
||||
hand and run it on the local host to make a new version of
|
||||
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||
at build time" for more details.
|
||||
|
||||
|
||||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
|
||||
|
||||
Testing PCRE2
|
||||
-------------
|
||||
|
||||
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
||||
There is another script called RunGrepTest that tests the pcre2grep command.
|
||||
When JIT support is enabled, a third test program called pcre2_jit_test is
|
||||
built. Both the scripts and all the program tests are run if you obey "make
|
||||
check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
|
||||
|
||||
The RunTest script runs the pcre2test test program (which is documented in its
|
||||
own man page) on each of the relevant testinput files in the testdata
|
||||
directory, and compares the output with the contents of the corresponding
|
||||
testoutput files. RunTest uses a file called testtry to hold the main output
|
||||
from pcre2test. Other files whose names begin with "test" are used as working
|
||||
files in some tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options were selected. For
|
||||
example, the tests for UTF-8/16/32 features are run only when Unicode support
|
||||
is available. RunTest outputs a comment when it skips a test.
|
||||
|
||||
Many (but not all) of the tests that are not skipped are run twice if JIT
|
||||
support is available. On the second run, JIT compilation is forced. This
|
||||
testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||
|
||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||
libraries that are enabled. If you want to run just one set of tests, call
|
||||
RunTest with either the -8, -16 or -32 option.
|
||||
|
||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||
on the RunTest command line. To run pcre2test on just one or more specific test
|
||||
files, give their numbers as arguments to RunTest, for example:
|
||||
|
||||
RunTest 2 7 11
|
||||
|
||||
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
end), or a number preceded by ~ to exclude a test. For example:
|
||||
|
||||
Runtest 3-15 ~10
|
||||
|
||||
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
|
||||
except test 13. Whatever order the arguments are in, the tests are always run
|
||||
in numerical order.
|
||||
|
||||
You can also call RunTest with the single argument "list" to cause it to output
|
||||
a list of tests.
|
||||
|
||||
The test sequence starts with "test 0", which is a special test that has no
|
||||
input file, and whose output is not checked. This is because it will be
|
||||
different on different hardware and with different configurations. The test
|
||||
exists in order to exercise some of pcre2test's code that would not otherwise
|
||||
be run.
|
||||
|
||||
Tests 1 and 2 can always be run, as they expect only plain text strings (not
|
||||
UTF) and make no use of Unicode properties. The first test file can be fed
|
||||
directly into the perltest.sh script to check that Perl gives the same results.
|
||||
The only difference you should see is in the first few lines, where the Perl
|
||||
version is given instead of the PCRE2 version. The second set of tests check
|
||||
auxiliary functions, error detection, and run-time flags that are specific to
|
||||
PCRE2. It also uses the debugging flags to check some of the internals of
|
||||
pcre2_compile().
|
||||
|
||||
If you build PCRE2 with a locale setting that is not the standard C locale, the
|
||||
character tables may be different (see next paragraph). In some cases, this may
|
||||
cause failures in the second set of tests. For example, in a locale where the
|
||||
isprint() function yields TRUE for characters in the range 128-255, the use of
|
||||
[:isascii:] inside a character class defines a different set of characters, and
|
||||
this shows up in this test as a difference in the compiled code, which is being
|
||||
listed for checking. For example, where the comparison test output contains
|
||||
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
|
||||
cases. This is not a bug in PCRE2.
|
||||
|
||||
Test 3 checks pcre2_maketables(), the facility for building a set of character
|
||||
tables for a specific locale and using them instead of the default tables. The
|
||||
script uses the "locale" command to check for the availability of the "fr_FR",
|
||||
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
|
||||
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
|
||||
the list of available locales, the third test cannot be run, and a comment is
|
||||
output to say why. If running this test produces an error like this:
|
||||
|
||||
** Failed to set locale "fr_FR"
|
||||
|
||||
it means that the given locale is not available on your system, despite being
|
||||
listed by "locale". This does not mean that PCRE2 is broken. There are three
|
||||
alternative output files for the third test, because three different versions
|
||||
of the French locale have been encountered. The test passes if its output
|
||||
matches any one of them.
|
||||
|
||||
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
|
||||
with the perltest.sh script, and test 5 checking PCRE2-specific things.
|
||||
|
||||
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
|
||||
non-UTF mode and UTF-mode with Unicode property support, respectively.
|
||||
|
||||
Test 8 checks some internal offsets and code size features, but it is run only
|
||||
when Unicode support is enabled. The output is different in 8-bit, 16-bit, and
|
||||
32-bit modes and for different link sizes, so there are different output files
|
||||
for each mode and link size.
|
||||
|
||||
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
|
||||
16-bit and 32-bit modes. These are tests that generate different output in
|
||||
8-bit mode. Each pair are for general cases and Unicode support, respectively.
|
||||
|
||||
Test 13 checks the handling of non-UTF characters greater than 255 by
|
||||
pcre2_dfa_match() in 16-bit and 32-bit modes.
|
||||
|
||||
Test 14 contains some special UTF and UCP tests that give different output for
|
||||
different code unit widths.
|
||||
|
||||
Test 15 contains a number of tests that must not be run with JIT. They check,
|
||||
among other non-JIT things, the match-limiting features of the intepretive
|
||||
matcher.
|
||||
|
||||
Test 16 is run only when JIT support is not available. It checks that an
|
||||
attempt to use JIT has the expected behaviour.
|
||||
|
||||
Test 17 is run only when JIT support is available. It checks JIT complete and
|
||||
partial modes, match-limiting under JIT, and other JIT-specific features.
|
||||
|
||||
Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to
|
||||
the 8-bit library, without and with Unicode support, respectively.
|
||||
|
||||
Test 20 checks the serialization functions by writing a set of compiled
|
||||
patterns to a file, and then reloading and checking them.
|
||||
|
||||
Tests 21 and 22 test \C support when the use of \C is not locked out, without
|
||||
and with UTF support, respectively. Test 23 tests \C when it is locked out.
|
||||
|
||||
Tests 24 and 25 test the experimental pattern conversion functions, without and
|
||||
with UTF support, respectively.
|
||||
|
||||
|
||||
Character tables
|
||||
----------------
|
||||
|
||||
For speed, PCRE2 uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, a set of tables that is
|
||||
built into the library is used. The pcre2_maketables() function can be called
|
||||
by an application to create a new set of tables in the current locale. This are
|
||||
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
|
||||
compile context.
|
||||
|
||||
The source file called pcre2_chartables.c contains the default set of tables.
|
||||
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
||||
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
||||
specified for ./configure, a new version of pcre2_chartables.c is built by the
|
||||
program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
|
||||
character handling functions such as isalnum(), isalpha(), isupper(),
|
||||
islower(), etc. to build the table sources. This means that the default C
|
||||
locale that is set for your system will control the contents of these default
|
||||
tables. You can change the default tables by editing pcre2_chartables.c and
|
||||
then re-building PCRE2. If you do this, you should take care to ensure that the
|
||||
file does not get automatically re-generated. The best way to do this is to
|
||||
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the pcre2_dftables program is run as a result of specifying
|
||||
--enable-rebuild-chartables, it uses the default C locale that is set on your
|
||||
system. It does not pay attention to the LC_xxx environment variables. In other
|
||||
words, it uses the system's default locale rather than whatever the compiling
|
||||
user happens to have set. If you really do want to build a source set of
|
||||
character tables in a locale that is specified by the LC_xxx variables, you can
|
||||
run the pcre2_dftables program by hand with the -L option. For example:
|
||||
|
||||
./pcre2_dftables -L pcre2_chartables.c.special
|
||||
|
||||
The second argument names the file where the source code for the tables is
|
||||
written. The first two 256-byte tables provide lower casing and case flipping
|
||||
functions, respectively. The next table consists of a number of 32-byte bit
|
||||
maps which identify certain character classes such as digits, "word"
|
||||
characters, white space, etc. These are used when building 32-byte bit maps
|
||||
that represent character classes for code points less than 256. The final
|
||||
256-byte table has bits indicating various character types, as follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 lower case letter
|
||||
8 decimal digit
|
||||
16 alphanumeric or '_'
|
||||
|
||||
You can also specify -b (with or without -L) when running pcre2_dftables. This
|
||||
causes the tables to be written in binary instead of as source code. A set of
|
||||
binary tables can be loaded into memory by an application and passed to
|
||||
pcre2_compile() in the same way as tables created dynamically by calling
|
||||
pcre2_maketables(). The tables are just a string of bytes, independent of
|
||||
hardware characteristics such as endianness. This means they can be bundled
|
||||
with an application that runs in different environments, to ensure consistent
|
||||
behaviour.
|
||||
|
||||
See also the pcre2build section "Creating character tables at build time".
|
||||
|
||||
|
||||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the files listed below.
|
||||
|
||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
the src directory:
|
||||
|
||||
src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
src/pcre2_chartables.c.dist a default set of character tables that assume
|
||||
ASCII coding; unless --enable-rebuild-chartables is
|
||||
specified, used by copying to pcre2_chartables.c
|
||||
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_convert.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_extuni.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
src/pcre2_xclass.c )
|
||||
|
||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
||||
|
||||
src/config.h.in template for config.h, when built by "configure"
|
||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||
src/pcre2posix.h header for the external POSIX wrapper API
|
||||
src/pcre2_internal.h header for internal use
|
||||
src/pcre2_intmodedep.h a mode-specific internal header
|
||||
src/pcre2_ucp.h header for Unicode property handling
|
||||
|
||||
sljit/* source files for the JIT compiler
|
||||
|
||||
(B) Source files for programs that use PCRE2:
|
||||
|
||||
src/pcre2demo.c simple demonstration of coding calls to PCRE2
|
||||
src/pcre2grep.c source of a grep utility that uses PCRE2
|
||||
src/pcre2test.c comprehensive test program
|
||||
src/pcre2_jit_test.c JIT test program
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE2
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE2
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE2
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcre2grep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for PCRE2
|
||||
doc/*.1 man page sources for pcre2grep and pcre2test
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre2.txt plain text version of the man pages
|
||||
doc/pcre2test.txt plain text documentation of test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
||||
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcre2grep tests
|
||||
testdata/* other supporting test files
|
||||
|
||||
(D) Auxiliary files for cmake support
|
||||
|
||||
cmake/COPYING-CMAKE-SCRIPTS
|
||||
cmake/FindPackageHandleStandardArgs.cmake
|
||||
cmake/FindEditline.cmake
|
||||
cmake/FindReadline.cmake
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for building PCRE2 "by hand"
|
||||
|
||||
src/pcre2.h.generic ) a version of the public PCRE2 header file
|
||||
) for use in non-"configure" environments
|
||||
src/config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 04 December 2020
|
1548
pcre2/aclocal.m4
vendored
Normal file
1548
pcre2/aclocal.m4
vendored
Normal file
File diff suppressed because it is too large
Load Diff
271
pcre2/ar-lib
vendored
Executable file
271
pcre2/ar-lib
vendored
Executable file
@ -0,0 +1,271 @@
|
||||
#! /bin/sh
|
||||
# Wrapper for Microsoft lib.exe
|
||||
|
||||
me=ar-lib
|
||||
scriptversion=2019-07-04.01; # UTC
|
||||
|
||||
# Copyright (C) 2010-2020 Free Software Foundation, Inc.
|
||||
# Written by Peter Rosin <peda@lysator.liu.se>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
|
||||
# func_error message
|
||||
func_error ()
|
||||
{
|
||||
echo "$me: $1" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN* | MSYS*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv in
|
||||
mingw)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin | msys)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_at_file at_file operation archive
|
||||
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
|
||||
# for each of them.
|
||||
# When interpreting the content of the @FILE, do NOT use func_file_conv,
|
||||
# since the user would need to supply preconverted file names to
|
||||
# binutils ar, at least for MinGW.
|
||||
func_at_file ()
|
||||
{
|
||||
operation=$2
|
||||
archive=$3
|
||||
at_file_contents=`cat "$1"`
|
||||
eval set x "$at_file_contents"
|
||||
shift
|
||||
|
||||
for member
|
||||
do
|
||||
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
|
||||
done
|
||||
}
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
func_error "no command. Try '$0 --help' for more information."
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<EOF
|
||||
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
|
||||
|
||||
Members may be specified in a file named with @FILE.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "$me, version $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
if test $# -lt 3; then
|
||||
func_error "you must specify a program, an action and an archive"
|
||||
fi
|
||||
|
||||
AR=$1
|
||||
shift
|
||||
while :
|
||||
do
|
||||
if test $# -lt 2; then
|
||||
func_error "you must specify a program, an action and an archive"
|
||||
fi
|
||||
case $1 in
|
||||
-lib | -LIB \
|
||||
| -ltcg | -LTCG \
|
||||
| -machine* | -MACHINE* \
|
||||
| -subsystem* | -SUBSYSTEM* \
|
||||
| -verbose | -VERBOSE \
|
||||
| -wx* | -WX* )
|
||||
AR="$AR $1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
action=$1
|
||||
shift
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
orig_archive=$1
|
||||
shift
|
||||
func_file_conv "$orig_archive"
|
||||
archive=$file
|
||||
|
||||
# strip leading dash in $action
|
||||
action=${action#-}
|
||||
|
||||
delete=
|
||||
extract=
|
||||
list=
|
||||
quick=
|
||||
replace=
|
||||
index=
|
||||
create=
|
||||
|
||||
while test -n "$action"
|
||||
do
|
||||
case $action in
|
||||
d*) delete=yes ;;
|
||||
x*) extract=yes ;;
|
||||
t*) list=yes ;;
|
||||
q*) quick=yes ;;
|
||||
r*) replace=yes ;;
|
||||
s*) index=yes ;;
|
||||
S*) ;; # the index is always updated implicitly
|
||||
c*) create=yes ;;
|
||||
u*) ;; # TODO: don't ignore the update modifier
|
||||
v*) ;; # TODO: don't ignore the verbose modifier
|
||||
*)
|
||||
func_error "unknown action specified"
|
||||
;;
|
||||
esac
|
||||
action=${action#?}
|
||||
done
|
||||
|
||||
case $delete$extract$list$quick$replace,$index in
|
||||
yes,* | ,yes)
|
||||
;;
|
||||
yesyes*)
|
||||
func_error "more than one action specified"
|
||||
;;
|
||||
*)
|
||||
func_error "no action specified"
|
||||
;;
|
||||
esac
|
||||
|
||||
if test -n "$delete"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_at_file "${1#@}" -REMOVE "$archive"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
elif test -n "$extract"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
if test $# -gt 0; then
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_at_file "${1#@}" -EXTRACT "$archive"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
|
||||
;;
|
||||
esac
|
||||
done
|
||||
else
|
||||
$AR -NOLOGO -LIST "$archive" | tr -d '\r' | sed -e 's/\\/\\\\/g' \
|
||||
| while read member
|
||||
do
|
||||
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
|
||||
done
|
||||
fi
|
||||
|
||||
elif test -n "$quick$replace"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
if test -z "$create"; then
|
||||
echo "$me: creating $orig_archive"
|
||||
fi
|
||||
orig_archive=
|
||||
else
|
||||
orig_archive=$archive
|
||||
fi
|
||||
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_file_conv "${1#@}"
|
||||
set x "$@" "@$file"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
set x "$@" "$file"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
shift
|
||||
done
|
||||
|
||||
if test -n "$orig_archive"; then
|
||||
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
|
||||
else
|
||||
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
|
||||
fi
|
||||
|
||||
elif test -n "$list"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
$AR -NOLOGO -LIST "$archive" || exit $?
|
||||
fi
|
22
pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
22
pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
@ -0,0 +1,22 @@
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
17
pcre2/cmake/FindEditline.cmake
Normal file
17
pcre2/cmake/FindEditline.cmake
Normal file
@ -0,0 +1,17 @@
|
||||
# Modified from FindReadline.cmake (PH Feb 2012)
|
||||
|
||||
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
set(EDITLINE_FOUND TRUE)
|
||||
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
|
||||
/usr/include/editline
|
||||
/usr/include/edit/readline
|
||||
/usr/include/readline
|
||||
)
|
||||
|
||||
FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
|
||||
|
||||
MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
58
pcre2/cmake/FindPackageHandleStandardArgs.cmake
Normal file
58
pcre2/cmake/FindPackageHandleStandardArgs.cmake
Normal file
@ -0,0 +1,58 @@
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
|
||||
# This macro is intended to be used in FindXXX.cmake modules files.
|
||||
# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
|
||||
# it also sets the <UPPERCASED_NAME>_FOUND variable.
|
||||
# The package is found if all variables listed are TRUE.
|
||||
# Example:
|
||||
#
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
|
||||
#
|
||||
# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
|
||||
# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
|
||||
# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
|
||||
# independent whether QUIET was used or not.
|
||||
# If it is found, the location is reported using the VAR1 argument, so
|
||||
# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
|
||||
# If the second argument is DEFAULT_MSG, the message in the failure case will
|
||||
# be "Could NOT find LibXml2", if you don't like this message you can specify
|
||||
# your own custom failure message there.
|
||||
|
||||
MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
|
||||
|
||||
IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
IF (${_NAME}_FIND_REQUIRED)
|
||||
SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
|
||||
ELSE (${_NAME}_FIND_REQUIRED)
|
||||
SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
|
||||
ENDIF (${_NAME}_FIND_REQUIRED)
|
||||
ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
SET(_FAIL_MESSAGE "${_FAIL_MSG}")
|
||||
ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
|
||||
STRING(TOUPPER ${_NAME} _NAME_UPPER)
|
||||
|
||||
SET(${_NAME_UPPER}_FOUND TRUE)
|
||||
IF(NOT ${_VAR1})
|
||||
SET(${_NAME_UPPER}_FOUND FALSE)
|
||||
ENDIF(NOT ${_VAR1})
|
||||
|
||||
FOREACH(_CURRENT_VAR ${ARGN})
|
||||
IF(NOT ${_CURRENT_VAR})
|
||||
SET(${_NAME_UPPER}_FOUND FALSE)
|
||||
ENDIF(NOT ${_CURRENT_VAR})
|
||||
ENDFOREACH(_CURRENT_VAR)
|
||||
|
||||
IF (${_NAME_UPPER}_FOUND)
|
||||
IF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
|
||||
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
ELSE (${_NAME_UPPER}_FOUND)
|
||||
IF (${_NAME}_FIND_REQUIRED)
|
||||
MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
|
||||
ELSE (${_NAME}_FIND_REQUIRED)
|
||||
IF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "${_FAIL_MESSAGE}")
|
||||
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
ENDIF (${_NAME}_FIND_REQUIRED)
|
||||
ENDIF (${_NAME_UPPER}_FOUND)
|
||||
ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
|
29
pcre2/cmake/FindReadline.cmake
Normal file
29
pcre2/cmake/FindReadline.cmake
Normal file
@ -0,0 +1,29 @@
|
||||
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
|
||||
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
|
||||
# --> BSD licensed
|
||||
#
|
||||
# GNU Readline library finder
|
||||
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
set(READLINE_FOUND TRUE)
|
||||
else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h
|
||||
/usr/include/readline
|
||||
)
|
||||
|
||||
# 2008-04-22 The next clause used to read like this:
|
||||
#
|
||||
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
|
||||
# include(FindPackageHandleStandardArgs)
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
#
|
||||
# I was advised to modify it such that it will find an ncurses library if
|
||||
# required, but not if one was explicitly given, that is, it allows the
|
||||
# default to be overridden. PH
|
||||
|
||||
FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
|
||||
MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
348
pcre2/compile
vendored
Executable file
348
pcre2/compile
vendored
Executable file
@ -0,0 +1,348 @@
|
||||
#! /bin/sh
|
||||
# Wrapper for compilers which do not understand '-c -o'.
|
||||
|
||||
scriptversion=2018-03-07.03; # UTC
|
||||
|
||||
# Copyright (C) 1999-2020 Free Software Foundation, Inc.
|
||||
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
nl='
|
||||
'
|
||||
|
||||
# We need space, tab and new line, in precisely that order. Quoting is
|
||||
# there to prevent tools from complaining about whitespace usage.
|
||||
IFS=" "" $nl"
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file lazy
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts. If the determined conversion
|
||||
# type is listed in (the comma separated) LAZY, no conversion will
|
||||
# take place.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN* | MSYS*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv/,$2, in
|
||||
*,$file_conv,*)
|
||||
;;
|
||||
mingw/*)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin/* | msys/*)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine/*)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_cl_dashL linkdir
|
||||
# Make cl look for libraries in LINKDIR
|
||||
func_cl_dashL ()
|
||||
{
|
||||
func_file_conv "$1"
|
||||
if test -z "$lib_path"; then
|
||||
lib_path=$file
|
||||
else
|
||||
lib_path="$lib_path;$file"
|
||||
fi
|
||||
linker_opts="$linker_opts -LIBPATH:$file"
|
||||
}
|
||||
|
||||
# func_cl_dashl library
|
||||
# Do a library search-path lookup for cl
|
||||
func_cl_dashl ()
|
||||
{
|
||||
lib=$1
|
||||
found=no
|
||||
save_IFS=$IFS
|
||||
IFS=';'
|
||||
for dir in $lib_path $LIB
|
||||
do
|
||||
IFS=$save_IFS
|
||||
if $shared && test -f "$dir/$lib.dll.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.dll.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/$lib.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/lib$lib.a"; then
|
||||
found=yes
|
||||
lib=$dir/lib$lib.a
|
||||
break
|
||||
fi
|
||||
done
|
||||
IFS=$save_IFS
|
||||
|
||||
if test "$found" != yes; then
|
||||
lib=$lib.lib
|
||||
fi
|
||||
}
|
||||
|
||||
# func_cl_wrapper cl arg...
|
||||
# Adjust compile command to suit cl
|
||||
func_cl_wrapper ()
|
||||
{
|
||||
# Assume a capable shell
|
||||
lib_path=
|
||||
shared=:
|
||||
linker_opts=
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.[oO][bB][jJ])
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fo"$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fe"$file"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
-I)
|
||||
eat=1
|
||||
func_file_conv "$2" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-I*)
|
||||
func_file_conv "${1#-I}" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-l)
|
||||
eat=1
|
||||
func_cl_dashl "$2"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-l*)
|
||||
func_cl_dashl "${1#-l}"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-L)
|
||||
eat=1
|
||||
func_cl_dashL "$2"
|
||||
;;
|
||||
-L*)
|
||||
func_cl_dashL "${1#-L}"
|
||||
;;
|
||||
-static)
|
||||
shared=false
|
||||
;;
|
||||
-Wl,*)
|
||||
arg=${1#-Wl,}
|
||||
save_ifs="$IFS"; IFS=','
|
||||
for flag in $arg; do
|
||||
IFS="$save_ifs"
|
||||
linker_opts="$linker_opts $flag"
|
||||
done
|
||||
IFS="$save_ifs"
|
||||
;;
|
||||
-Xlinker)
|
||||
eat=1
|
||||
linker_opts="$linker_opts $2"
|
||||
;;
|
||||
-*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
|
||||
func_file_conv "$1"
|
||||
set x "$@" -Tp"$file"
|
||||
shift
|
||||
;;
|
||||
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
|
||||
func_file_conv "$1" mingw
|
||||
set x "$@" "$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
if test -n "$linker_opts"; then
|
||||
linker_opts="-link$linker_opts"
|
||||
fi
|
||||
exec "$@" $linker_opts
|
||||
exit 1
|
||||
}
|
||||
|
||||
eat=
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: compile [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Wrapper for compilers which do not understand '-c -o'.
|
||||
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
|
||||
arguments, and rename the output as expected.
|
||||
|
||||
If you are trying to build a whole package this is not the
|
||||
right script to run: please start by reading the file 'INSTALL'.
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "compile $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \
|
||||
icl | *[/\\]icl | icl.exe | *[/\\]icl.exe )
|
||||
func_cl_wrapper "$@" # Doesn't return...
|
||||
;;
|
||||
esac
|
||||
|
||||
ofile=
|
||||
cfile=
|
||||
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
# So we strip '-o arg' only if arg is an object.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.obj)
|
||||
ofile=$2
|
||||
;;
|
||||
*)
|
||||
set x "$@" -o "$2"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*.c)
|
||||
cfile=$1
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
|
||||
if test -z "$ofile" || test -z "$cfile"; then
|
||||
# If no '-o' option was seen then we might have been invoked from a
|
||||
# pattern rule where we don't need one. That is ok -- this is a
|
||||
# normal compilation that the losing compiler can handle. If no
|
||||
# '.c' file was seen then we are probably linking. That is also
|
||||
# ok.
|
||||
exec "$@"
|
||||
fi
|
||||
|
||||
# Name of file we expect compiler to create.
|
||||
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
|
||||
|
||||
# Create the lock directory.
|
||||
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
|
||||
# that we are using for the .o file. Also, base the name on the expected
|
||||
# object file name, since that is what matters with a parallel build.
|
||||
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
|
||||
while true; do
|
||||
if mkdir "$lockdir" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
# FIXME: race condition here if user kills between mkdir and trap.
|
||||
trap "rmdir '$lockdir'; exit 1" 1 2 15
|
||||
|
||||
# Run the compile.
|
||||
"$@"
|
||||
ret=$?
|
||||
|
||||
if test -f "$cofile"; then
|
||||
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
|
||||
elif test -f "${cofile}bj"; then
|
||||
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
|
||||
fi
|
||||
|
||||
rmdir "$lockdir"
|
||||
exit $ret
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'before-save-hook 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC0"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
1667
pcre2/config.guess
vendored
Executable file
1667
pcre2/config.guess
vendored
Executable file
File diff suppressed because it is too large
Load Diff
1793
pcre2/config.sub
vendored
Executable file
1793
pcre2/config.sub
vendored
Executable file
File diff suppressed because it is too large
Load Diff
1114
pcre2/configure.ac
vendored
1114
pcre2/configure.ac
vendored
File diff suppressed because it is too large
Load Diff
791
pcre2/depcomp
vendored
Executable file
791
pcre2/depcomp
vendored
Executable file
@ -0,0 +1,791 @@
|
||||
#! /bin/sh
|
||||
# depcomp - compile a program generating dependencies as side-effects
|
||||
|
||||
scriptversion=2018-03-07.03; # UTC
|
||||
|
||||
# Copyright (C) 1999-2020 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Run PROGRAMS ARGS to compile a file, generating dependencies
|
||||
as side-effects.
|
||||
|
||||
Environment variables:
|
||||
depmode Dependency tracking mode.
|
||||
source Source file read by 'PROGRAMS ARGS'.
|
||||
object Object file output by 'PROGRAMS ARGS'.
|
||||
DEPDIR directory where to store dependencies.
|
||||
depfile Dependency file to output.
|
||||
tmpdepfile Temporary file to use when outputting dependencies.
|
||||
libtool Whether libtool is used (yes/no).
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "depcomp $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
# Get the directory component of the given path, and save it in the
|
||||
# global variables '$dir'. Note that this directory component will
|
||||
# be either empty or ending with a '/' character. This is deliberate.
|
||||
set_dir_from ()
|
||||
{
|
||||
case $1 in
|
||||
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
|
||||
*) dir=;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Get the suffix-stripped basename of the given path, and save it the
|
||||
# global variable '$base'.
|
||||
set_base_from ()
|
||||
{
|
||||
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
|
||||
}
|
||||
|
||||
# If no dependency file was actually created by the compiler invocation,
|
||||
# we still have to create a dummy depfile, to avoid errors with the
|
||||
# Makefile "include basename.Plo" scheme.
|
||||
make_dummy_depfile ()
|
||||
{
|
||||
echo "#dummy" > "$depfile"
|
||||
}
|
||||
|
||||
# Factor out some common post-processing of the generated depfile.
|
||||
# Requires the auxiliary global variable '$tmpdepfile' to be set.
|
||||
aix_post_process_depfile ()
|
||||
{
|
||||
# If the compiler actually managed to produce a dependency file,
|
||||
# post-process it.
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form 'foo.o: dependency.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# $object: dependency.h
|
||||
# and one to simply output
|
||||
# dependency.h:
|
||||
# which is needed to avoid the deleted-header problem.
|
||||
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
|
||||
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
|
||||
} > "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
}
|
||||
|
||||
# A tabulation character.
|
||||
tab=' '
|
||||
# A newline character.
|
||||
nl='
|
||||
'
|
||||
# Character ranges might be problematic outside the C locale.
|
||||
# These definitions help.
|
||||
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
lower=abcdefghijklmnopqrstuvwxyz
|
||||
digits=0123456789
|
||||
alpha=${upper}${lower}
|
||||
|
||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
|
||||
depfile=${depfile-`echo "$object" |
|
||||
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
|
||||
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
||||
|
||||
rm -f "$tmpdepfile"
|
||||
|
||||
# Avoid interferences from the environment.
|
||||
gccflag= dashmflag=
|
||||
|
||||
# Some modes work just like other modes, but use different flags. We
|
||||
# parameterize here, but still list the modes in the big case below,
|
||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||
# here, because this file can only contain one case statement.
|
||||
if test "$depmode" = hp; then
|
||||
# HP compiler uses -M and no extra arg.
|
||||
gccflag=-M
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
if test "$depmode" = dashXmstdout; then
|
||||
# This is just like dashmstdout with a different argument.
|
||||
dashmflag=-xM
|
||||
depmode=dashmstdout
|
||||
fi
|
||||
|
||||
cygpath_u="cygpath -u -f -"
|
||||
if test "$depmode" = msvcmsys; then
|
||||
# This is just like msvisualcpp but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvisualcpp
|
||||
fi
|
||||
|
||||
if test "$depmode" = msvc7msys; then
|
||||
# This is just like msvc7 but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvc7
|
||||
fi
|
||||
|
||||
if test "$depmode" = xlc; then
|
||||
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
|
||||
gccflag=-qmakedep=gcc,-MF
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
case "$depmode" in
|
||||
gcc3)
|
||||
## gcc 3 implements dependency tracking that does exactly what
|
||||
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
|
||||
## it if -MD -MP comes after the -MF stuff. Hmm.
|
||||
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
|
||||
## the command line argument order; so add the flags where they
|
||||
## appear in depend2.am. Note that the slowdown incurred here
|
||||
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
|
||||
*) set fnord "$@" "$arg" ;;
|
||||
esac
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
done
|
||||
"$@"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
mv "$tmpdepfile" "$depfile"
|
||||
;;
|
||||
|
||||
gcc)
|
||||
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
|
||||
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
|
||||
## (see the conditional assignment to $gccflag above).
|
||||
## There are various ways to get dependency output from gcc. Here's
|
||||
## why we pick this rather obscure method:
|
||||
## - Don't want to use -MD because we'd like the dependencies to end
|
||||
## up in a subdir. Having to rename by hand is ugly.
|
||||
## (We might end up doing this anyway to support other compilers.)
|
||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||
## -MM, not -M (despite what the docs say). Also, it might not be
|
||||
## supported by the other compilers which use the 'gcc' depmode.
|
||||
## - Using -M directly means running the compiler twice (even worse
|
||||
## than renaming).
|
||||
if test -z "$gccflag"; then
|
||||
gccflag=-MD,
|
||||
fi
|
||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The second -e expression handles DOS-style file names with drive
|
||||
# letters.
|
||||
sed -e 's/^[^:]*: / /' \
|
||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||
## This next piece of magic avoids the "deleted header file" problem.
|
||||
## The problem is that when a header file which appears in a .P file
|
||||
## is deleted, the dependency causes make to die (because there is
|
||||
## typically no way to rebuild the header). We avoid this by adding
|
||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||
## this for us directly.
|
||||
## Some versions of gcc put a space before the ':'. On the theory
|
||||
## that the space means something, we add a space to the output as
|
||||
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||
## to the object. Take care to not repeat it in the output.
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
sgi)
|
||||
if test "$libtool" = yes; then
|
||||
"$@" "-Wp,-MDupdate,$tmpdepfile"
|
||||
else
|
||||
"$@" -MDupdate "$tmpdepfile"
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
|
||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||
echo "$object : \\" > "$depfile"
|
||||
# Clip off the initial element (the dependent). Don't try to be
|
||||
# clever and replace this with sed code, as IRIX sed won't handle
|
||||
# lines with more than a fixed number of characters (4096 in
|
||||
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||
# dependency line.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
|
||||
| tr "$nl" ' ' >> "$depfile"
|
||||
echo >> "$depfile"
|
||||
# The second pass generates a dummy entry for each header file.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||
>> "$depfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
xlc)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
aix)
|
||||
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||
# in a .u file. In older versions, this file always lives in the
|
||||
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||
# start of each line; $object doesn't have directory information.
|
||||
# Version 6 uses the directory in both cases.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$base.u
|
||||
tmpdepfile3=$dir.libs/$base.u
|
||||
"$@" -Wc,-M
|
||||
else
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$dir$base.u
|
||||
tmpdepfile3=$dir$base.u
|
||||
"$@" -M
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
tcc)
|
||||
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
|
||||
# FIXME: That version still under development at the moment of writing.
|
||||
# Make that this statement remains true also for stable, released
|
||||
# versions.
|
||||
# It will wrap lines (doesn't matter whether long or short) with a
|
||||
# trailing '\', as in:
|
||||
#
|
||||
# foo.o : \
|
||||
# foo.c \
|
||||
# foo.h \
|
||||
#
|
||||
# It will put a trailing '\' even on the last line, and will use leading
|
||||
# spaces rather than leading tabs (at least since its commit 0394caf7
|
||||
# "Emit spaces for -MD").
|
||||
"$@" -MD -MF "$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
|
||||
# We have to change lines of the first kind to '$object: \'.
|
||||
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
|
||||
# And for each line of the second kind, we have to emit a 'dep.h:'
|
||||
# dummy dependency, to avoid the deleted-header problem.
|
||||
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
## The order of this option in the case statement is important, since the
|
||||
## shell code in configure will try each of these formats in the order
|
||||
## listed in this file. A plain '-MD' option would be understood by many
|
||||
## compilers, so we must ensure this comes after the gcc and icc options.
|
||||
pgcc)
|
||||
# Portland's C compiler understands '-MD'.
|
||||
# Will always output deps to 'file.d' where file is the root name of the
|
||||
# source file under compilation, even if file resides in a subdirectory.
|
||||
# The object file name does not affect the name of the '.d' file.
|
||||
# pgcc 10.2 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using '\' :
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
set_dir_from "$object"
|
||||
# Use the source, not the object, to determine the base name, since
|
||||
# that's sadly what pgcc will do too.
|
||||
set_base_from "$source"
|
||||
tmpdepfile=$base.d
|
||||
|
||||
# For projects that build the same source file twice into different object
|
||||
# files, the pgcc approach of using the *source* file root name can cause
|
||||
# problems in parallel builds. Use a locking strategy to avoid stomping on
|
||||
# the same $tmpdepfile.
|
||||
lockdir=$base.d-lock
|
||||
trap "
|
||||
echo '$0: caught signal, cleaning up...' >&2
|
||||
rmdir '$lockdir'
|
||||
exit 1
|
||||
" 1 2 13 15
|
||||
numtries=100
|
||||
i=$numtries
|
||||
while test $i -gt 0; do
|
||||
# mkdir is a portable test-and-set.
|
||||
if mkdir "$lockdir" 2>/dev/null; then
|
||||
# This process acquired the lock.
|
||||
"$@" -MD
|
||||
stat=$?
|
||||
# Release the lock.
|
||||
rmdir "$lockdir"
|
||||
break
|
||||
else
|
||||
# If the lock is being held by a different process, wait
|
||||
# until the winning process is done or we timeout.
|
||||
while test -d "$lockdir" && test $i -gt 0; do
|
||||
sleep 1
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
fi
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
trap - 1 2 13 15
|
||||
if test $i -le 0; then
|
||||
echo "$0: failed to acquire lock after $numtries attempts" >&2
|
||||
echo "$0: check lockdir '$lockdir'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form `foo.o: dependent.h',
|
||||
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||
# Do two passes, one to just change these to
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp2)
|
||||
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
|
||||
# compilers, which have integrated preprocessors. The correct option
|
||||
# to use with these is +Maked; it writes dependencies to a file named
|
||||
# 'foo.d', which lands next to the object file, wherever that
|
||||
# happens to be.
|
||||
# Much of this is similar to the tru64 case; see comments there.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir.libs/$base.d
|
||||
"$@" -Wc,+Maked
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
"$@" +Maked
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
# Add 'dependent.h:' lines.
|
||||
sed -ne '2,${
|
||||
s/^ *//
|
||||
s/ \\*$//
|
||||
s/$/:/
|
||||
p
|
||||
}' "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||
;;
|
||||
|
||||
tru64)
|
||||
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
|
||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||
# dependencies in 'foo.d' instead, so we check for that too.
|
||||
# Subdirectories are respected.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
|
||||
if test "$libtool" = yes; then
|
||||
# Libtool generates 2 separate objects for the 2 libraries. These
|
||||
# two compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# in $dir$base.o.d. We have to check for both files, because
|
||||
# one of the two compilations can be disabled. We should prefer
|
||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||
# the former would cause a distcleancheck panic.
|
||||
tmpdepfile1=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
|
||||
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
"$@" -Wc,-MD
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
tmpdepfile3=$dir$base.d
|
||||
"$@" -MD
|
||||
fi
|
||||
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
# Same post-processing that is required for AIX mode.
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
msvc7)
|
||||
if test "$libtool" = yes; then
|
||||
showIncludes=-Wc,-showIncludes
|
||||
else
|
||||
showIncludes=-showIncludes
|
||||
fi
|
||||
"$@" $showIncludes > "$tmpdepfile"
|
||||
stat=$?
|
||||
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The first sed program below extracts the file names and escapes
|
||||
# backslashes for cygpath. The second sed program outputs the file
|
||||
# name when reading, but also accumulates all include files in the
|
||||
# hold buffer in order to output them again at the end. This only
|
||||
# works with sed implementations that can handle large buffers.
|
||||
sed < "$tmpdepfile" -n '
|
||||
/^Note: including file: *\(.*\)/ {
|
||||
s//\1/
|
||||
s/\\/\\\\/g
|
||||
p
|
||||
}' | $cygpath_u | sort -u | sed -n '
|
||||
s/ /\\ /g
|
||||
s/\(.*\)/'"$tab"'\1 \\/p
|
||||
s/.\(.*\) \\/\1:/
|
||||
H
|
||||
$ {
|
||||
s/.*/'"$tab"'/
|
||||
G
|
||||
p
|
||||
}' >> "$depfile"
|
||||
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvc7msys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
#nosideeffect)
|
||||
# This comment above is used by automake to tell side-effect
|
||||
# dependency tracking mechanisms from slower ones.
|
||||
|
||||
dashmstdout)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout, regardless of -o.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
test -z "$dashmflag" && dashmflag=-M
|
||||
# Require at least two characters before searching for ':'
|
||||
# in the target name. This is to cope with DOS-style filenames:
|
||||
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||
"$@" $dashmflag |
|
||||
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this sed invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
dashXmstdout)
|
||||
# This case only exists to satisfy depend.m4. It is never actually
|
||||
# run, as this mode is specially recognized in the preamble.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
makedepend)
|
||||
"$@" || exit $?
|
||||
# Remove any Libtool call
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
# X makedepend
|
||||
shift
|
||||
cleared=no eat=no
|
||||
for arg
|
||||
do
|
||||
case $cleared in
|
||||
no)
|
||||
set ""; shift
|
||||
cleared=yes ;;
|
||||
esac
|
||||
if test $eat = yes; then
|
||||
eat=no
|
||||
continue
|
||||
fi
|
||||
case "$arg" in
|
||||
-D*|-I*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
# Strip any option that makedepend may not understand. Remove
|
||||
# the object too, otherwise makedepend will parse it as a source file.
|
||||
-arch)
|
||||
eat=yes ;;
|
||||
-*|$object)
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
esac
|
||||
done
|
||||
obj_suffix=`echo "$object" | sed 's/^.*\././'`
|
||||
touch "$tmpdepfile"
|
||||
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||
rm -f "$depfile"
|
||||
# makedepend may prepend the VPATH from the source file name to the object.
|
||||
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process the last invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed '1,2d' "$tmpdepfile" \
|
||||
| tr ' ' "$nl" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||
;;
|
||||
|
||||
cpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
"$@" -E \
|
||||
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
| sed '$ s: \\$::' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
cat < "$tmpdepfile" >> "$depfile"
|
||||
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvisualcpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case "$arg" in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||
set fnord "$@"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
"$@" -E 2>/dev/null |
|
||||
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
|
||||
echo "$tab" >> "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvcmsys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
none)
|
||||
exec "$@"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown depmode $depmode" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'before-save-hook 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC0"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
529
pcre2/install-sh
vendored
Executable file
529
pcre2/install-sh
vendored
Executable file
@ -0,0 +1,529 @@
|
||||
#!/bin/sh
|
||||
# install - install a program, script, or datafile
|
||||
|
||||
scriptversion=2018-03-11.20; # UTC
|
||||
|
||||
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
||||
# later released in X11R6 (xc/config/util/install.sh) with the
|
||||
# following copyright and license.
|
||||
#
|
||||
# Copyright (C) 1994 X Consortium
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
||||
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# Except as contained in this notice, the name of the X Consortium shall not
|
||||
# be used in advertising or otherwise to promote the sale, use or other deal-
|
||||
# ings in this Software without prior written authorization from the X Consor-
|
||||
# tium.
|
||||
#
|
||||
#
|
||||
# FSF changes to this file are in the public domain.
|
||||
#
|
||||
# Calling this script install-sh is preferred over install.sh, to prevent
|
||||
# 'make' implicit rules from creating a file called install from it
|
||||
# when there is no Makefile.
|
||||
#
|
||||
# This script is compatible with the BSD install script, but was written
|
||||
# from scratch.
|
||||
|
||||
tab=' '
|
||||
nl='
|
||||
'
|
||||
IFS=" $tab$nl"
|
||||
|
||||
# Set DOITPROG to "echo" to test this script.
|
||||
|
||||
doit=${DOITPROG-}
|
||||
doit_exec=${doit:-exec}
|
||||
|
||||
# Put in absolute file names if you don't have them in your path;
|
||||
# or use environment vars.
|
||||
|
||||
chgrpprog=${CHGRPPROG-chgrp}
|
||||
chmodprog=${CHMODPROG-chmod}
|
||||
chownprog=${CHOWNPROG-chown}
|
||||
cmpprog=${CMPPROG-cmp}
|
||||
cpprog=${CPPROG-cp}
|
||||
mkdirprog=${MKDIRPROG-mkdir}
|
||||
mvprog=${MVPROG-mv}
|
||||
rmprog=${RMPROG-rm}
|
||||
stripprog=${STRIPPROG-strip}
|
||||
|
||||
posix_mkdir=
|
||||
|
||||
# Desired mode of installed file.
|
||||
mode=0755
|
||||
|
||||
chgrpcmd=
|
||||
chmodcmd=$chmodprog
|
||||
chowncmd=
|
||||
mvcmd=$mvprog
|
||||
rmcmd="$rmprog -f"
|
||||
stripcmd=
|
||||
|
||||
src=
|
||||
dst=
|
||||
dir_arg=
|
||||
dst_arg=
|
||||
|
||||
copy_on_change=false
|
||||
is_target_a_directory=possibly
|
||||
|
||||
usage="\
|
||||
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
|
||||
or: $0 [OPTION]... SRCFILES... DIRECTORY
|
||||
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
|
||||
or: $0 [OPTION]... -d DIRECTORIES...
|
||||
|
||||
In the 1st form, copy SRCFILE to DSTFILE.
|
||||
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
|
||||
In the 4th, create DIRECTORIES.
|
||||
|
||||
Options:
|
||||
--help display this help and exit.
|
||||
--version display version info and exit.
|
||||
|
||||
-c (ignored)
|
||||
-C install only if different (preserve the last data modification time)
|
||||
-d create directories instead of installing files.
|
||||
-g GROUP $chgrpprog installed files to GROUP.
|
||||
-m MODE $chmodprog installed files to MODE.
|
||||
-o USER $chownprog installed files to USER.
|
||||
-s $stripprog installed files.
|
||||
-t DIRECTORY install into DIRECTORY.
|
||||
-T report an error if DSTFILE is a directory.
|
||||
|
||||
Environment variables override the default commands:
|
||||
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
|
||||
RMPROG STRIPPROG
|
||||
"
|
||||
|
||||
while test $# -ne 0; do
|
||||
case $1 in
|
||||
-c) ;;
|
||||
|
||||
-C) copy_on_change=true;;
|
||||
|
||||
-d) dir_arg=true;;
|
||||
|
||||
-g) chgrpcmd="$chgrpprog $2"
|
||||
shift;;
|
||||
|
||||
--help) echo "$usage"; exit $?;;
|
||||
|
||||
-m) mode=$2
|
||||
case $mode in
|
||||
*' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
|
||||
echo "$0: invalid mode: $mode" >&2
|
||||
exit 1;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-o) chowncmd="$chownprog $2"
|
||||
shift;;
|
||||
|
||||
-s) stripcmd=$stripprog;;
|
||||
|
||||
-t)
|
||||
is_target_a_directory=always
|
||||
dst_arg=$2
|
||||
# Protect names problematic for 'test' and other utilities.
|
||||
case $dst_arg in
|
||||
-* | [=\(\)!]) dst_arg=./$dst_arg;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-T) is_target_a_directory=never;;
|
||||
|
||||
--version) echo "$0 $scriptversion"; exit $?;;
|
||||
|
||||
--) shift
|
||||
break;;
|
||||
|
||||
-*) echo "$0: invalid option: $1" >&2
|
||||
exit 1;;
|
||||
|
||||
*) break;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# We allow the use of options -d and -T together, by making -d
|
||||
# take the precedence; this is for compatibility with GNU install.
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
if test -n "$dst_arg"; then
|
||||
echo "$0: target directory not allowed when installing a directory." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
|
||||
# When -d is used, all remaining arguments are directories to create.
|
||||
# When -t is used, the destination is already specified.
|
||||
# Otherwise, the last argument is the destination. Remove it from $@.
|
||||
for arg
|
||||
do
|
||||
if test -n "$dst_arg"; then
|
||||
# $@ is not empty: it contains at least $arg.
|
||||
set fnord "$@" "$dst_arg"
|
||||
shift # fnord
|
||||
fi
|
||||
shift # arg
|
||||
dst_arg=$arg
|
||||
# Protect names problematic for 'test' and other utilities.
|
||||
case $dst_arg in
|
||||
-* | [=\(\)!]) dst_arg=./$dst_arg;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
if test $# -eq 0; then
|
||||
if test -z "$dir_arg"; then
|
||||
echo "$0: no input file specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
# It's OK to call 'install-sh -d' without argument.
|
||||
# This can happen when creating conditional directories.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if test -z "$dir_arg"; then
|
||||
if test $# -gt 1 || test "$is_target_a_directory" = always; then
|
||||
if test ! -d "$dst_arg"; then
|
||||
echo "$0: $dst_arg: Is not a directory." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -z "$dir_arg"; then
|
||||
do_exit='(exit $ret); exit $ret'
|
||||
trap "ret=129; $do_exit" 1
|
||||
trap "ret=130; $do_exit" 2
|
||||
trap "ret=141; $do_exit" 13
|
||||
trap "ret=143; $do_exit" 15
|
||||
|
||||
# Set umask so as not to create temps with too-generous modes.
|
||||
# However, 'strip' requires both read and write access to temps.
|
||||
case $mode in
|
||||
# Optimize common cases.
|
||||
*644) cp_umask=133;;
|
||||
*755) cp_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw='% 200'
|
||||
fi
|
||||
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
|
||||
*)
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw=,u+rw
|
||||
fi
|
||||
cp_umask=$mode$u_plus_rw;;
|
||||
esac
|
||||
fi
|
||||
|
||||
for src
|
||||
do
|
||||
# Protect names problematic for 'test' and other utilities.
|
||||
case $src in
|
||||
-* | [=\(\)!]) src=./$src;;
|
||||
esac
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
dst=$src
|
||||
dstdir=$dst
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
else
|
||||
|
||||
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
|
||||
# might cause directories to be created, which would be especially bad
|
||||
# if $src (and thus $dsttmp) contains '*'.
|
||||
if test ! -f "$src" && test ! -d "$src"; then
|
||||
echo "$0: $src does not exist." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test -z "$dst_arg"; then
|
||||
echo "$0: no destination specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
dst=$dst_arg
|
||||
|
||||
# If destination is a directory, append the input filename.
|
||||
if test -d "$dst"; then
|
||||
if test "$is_target_a_directory" = never; then
|
||||
echo "$0: $dst_arg: Is a directory" >&2
|
||||
exit 1
|
||||
fi
|
||||
dstdir=$dst
|
||||
dstbase=`basename "$src"`
|
||||
case $dst in
|
||||
*/) dst=$dst$dstbase;;
|
||||
*) dst=$dst/$dstbase;;
|
||||
esac
|
||||
dstdir_status=0
|
||||
else
|
||||
dstdir=`dirname "$dst"`
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
case $dstdir in
|
||||
*/) dstdirslash=$dstdir;;
|
||||
*) dstdirslash=$dstdir/;;
|
||||
esac
|
||||
|
||||
obsolete_mkdir_used=false
|
||||
|
||||
if test $dstdir_status != 0; then
|
||||
case $posix_mkdir in
|
||||
'')
|
||||
# Create intermediate dirs using mode 755 as modified by the umask.
|
||||
# This is like FreeBSD 'install' as of 1997-10-28.
|
||||
umask=`umask`
|
||||
case $stripcmd.$umask in
|
||||
# Optimize common cases.
|
||||
*[2367][2367]) mkdir_umask=$umask;;
|
||||
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
mkdir_umask=`expr $umask + 22 \
|
||||
- $umask % 100 % 40 + $umask % 20 \
|
||||
- $umask % 10 % 4 + $umask % 2
|
||||
`;;
|
||||
*) mkdir_umask=$umask,go-w;;
|
||||
esac
|
||||
|
||||
# With -d, create the new directory with the user-specified mode.
|
||||
# Otherwise, rely on $mkdir_umask.
|
||||
if test -n "$dir_arg"; then
|
||||
mkdir_mode=-m$mode
|
||||
else
|
||||
mkdir_mode=
|
||||
fi
|
||||
|
||||
posix_mkdir=false
|
||||
case $umask in
|
||||
*[123567][0-7][0-7])
|
||||
# POSIX mkdir -p sets u+wx bits regardless of umask, which
|
||||
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
|
||||
;;
|
||||
*)
|
||||
# Note that $RANDOM variable is not portable (e.g. dash); Use it
|
||||
# here however when possible just to lower collision chance.
|
||||
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
|
||||
|
||||
trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0
|
||||
|
||||
# Because "mkdir -p" follows existing symlinks and we likely work
|
||||
# directly in world-writeable /tmp, make sure that the '$tmpdir'
|
||||
# directory is successfully created first before we actually test
|
||||
# 'mkdir -p' feature.
|
||||
if (umask $mkdir_umask &&
|
||||
$mkdirprog $mkdir_mode "$tmpdir" &&
|
||||
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1
|
||||
then
|
||||
if test -z "$dir_arg" || {
|
||||
# Check for POSIX incompatibilities with -m.
|
||||
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
|
||||
# other-writable bit of parent directory when it shouldn't.
|
||||
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
|
||||
test_tmpdir="$tmpdir/a"
|
||||
ls_ld_tmpdir=`ls -ld "$test_tmpdir"`
|
||||
case $ls_ld_tmpdir in
|
||||
d????-?r-*) different_mode=700;;
|
||||
d????-?--*) different_mode=755;;
|
||||
*) false;;
|
||||
esac &&
|
||||
$mkdirprog -m$different_mode -p -- "$test_tmpdir" && {
|
||||
ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"`
|
||||
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
|
||||
}
|
||||
}
|
||||
then posix_mkdir=:
|
||||
fi
|
||||
rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir"
|
||||
else
|
||||
# Remove any dirs left behind by ancient mkdir implementations.
|
||||
rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null
|
||||
fi
|
||||
trap '' 0;;
|
||||
esac;;
|
||||
esac
|
||||
|
||||
if
|
||||
$posix_mkdir && (
|
||||
umask $mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
|
||||
)
|
||||
then :
|
||||
else
|
||||
|
||||
# The umask is ridiculous, or mkdir does not conform to POSIX,
|
||||
# or it failed possibly due to a race condition. Create the
|
||||
# directory the slow way, step by step, checking for races as we go.
|
||||
|
||||
case $dstdir in
|
||||
/*) prefix='/';;
|
||||
[-=\(\)!]*) prefix='./';;
|
||||
*) prefix='';;
|
||||
esac
|
||||
|
||||
oIFS=$IFS
|
||||
IFS=/
|
||||
set -f
|
||||
set fnord $dstdir
|
||||
shift
|
||||
set +f
|
||||
IFS=$oIFS
|
||||
|
||||
prefixes=
|
||||
|
||||
for d
|
||||
do
|
||||
test X"$d" = X && continue
|
||||
|
||||
prefix=$prefix$d
|
||||
if test -d "$prefix"; then
|
||||
prefixes=
|
||||
else
|
||||
if $posix_mkdir; then
|
||||
(umask=$mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
|
||||
# Don't fail if two instances are running concurrently.
|
||||
test -d "$prefix" || exit 1
|
||||
else
|
||||
case $prefix in
|
||||
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
|
||||
*) qprefix=$prefix;;
|
||||
esac
|
||||
prefixes="$prefixes '$qprefix'"
|
||||
fi
|
||||
fi
|
||||
prefix=$prefix/
|
||||
done
|
||||
|
||||
if test -n "$prefixes"; then
|
||||
# Don't fail if two instances are running concurrently.
|
||||
(umask $mkdir_umask &&
|
||||
eval "\$doit_exec \$mkdirprog $prefixes") ||
|
||||
test -d "$dstdir" || exit 1
|
||||
obsolete_mkdir_used=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
|
||||
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
|
||||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
|
||||
else
|
||||
|
||||
# Make a couple of temp file names in the proper directory.
|
||||
dsttmp=${dstdirslash}_inst.$$_
|
||||
rmtmp=${dstdirslash}_rm.$$_
|
||||
|
||||
# Trap to clean up those temp files at exit.
|
||||
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
|
||||
|
||||
# Copy the file name to the temp name.
|
||||
(umask $cp_umask &&
|
||||
{ test -z "$stripcmd" || {
|
||||
# Create $dsttmp read-write so that cp doesn't create it read-only,
|
||||
# which would cause strip to fail.
|
||||
if test -z "$doit"; then
|
||||
: >"$dsttmp" # No need to fork-exec 'touch'.
|
||||
else
|
||||
$doit touch "$dsttmp"
|
||||
fi
|
||||
}
|
||||
} &&
|
||||
$doit_exec $cpprog "$src" "$dsttmp") &&
|
||||
|
||||
# and set any options; do chmod last to preserve setuid bits.
|
||||
#
|
||||
# If any of these fail, we abort the whole thing. If we want to
|
||||
# ignore errors from any of these, just make sure not to ignore
|
||||
# errors from the above "$doit $cpprog $src $dsttmp" command.
|
||||
#
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
|
||||
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
|
||||
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
|
||||
|
||||
# If -C, don't bother to copy if it wouldn't change the file.
|
||||
if $copy_on_change &&
|
||||
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
|
||||
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
|
||||
set -f &&
|
||||
set X $old && old=:$2:$4:$5:$6 &&
|
||||
set X $new && new=:$2:$4:$5:$6 &&
|
||||
set +f &&
|
||||
test "$old" = "$new" &&
|
||||
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
|
||||
then
|
||||
rm -f "$dsttmp"
|
||||
else
|
||||
# Rename the file to the real destination.
|
||||
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
|
||||
|
||||
# The rename failed, perhaps because mv can't rename something else
|
||||
# to itself, or perhaps because mv is so ancient that it does not
|
||||
# support -f.
|
||||
{
|
||||
# Now remove or move aside any old file at destination location.
|
||||
# We try this two ways since rm can't unlink itself on some
|
||||
# systems and the destination file might be busy for other
|
||||
# reasons. In this case, the final cleanup might fail but the new
|
||||
# file should still install successfully.
|
||||
{
|
||||
test ! -f "$dst" ||
|
||||
$doit $rmcmd -f "$dst" 2>/dev/null ||
|
||||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
|
||||
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
|
||||
} ||
|
||||
{ echo "$0: cannot unlink or rename $dst" >&2
|
||||
(exit 1); exit 1
|
||||
}
|
||||
} &&
|
||||
|
||||
# Now rename the file to the real destination.
|
||||
$doit $mvcmd "$dsttmp" "$dst"
|
||||
}
|
||||
fi || exit 1
|
||||
|
||||
trap '' 0
|
||||
fi
|
||||
done
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'before-save-hook 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC0"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
2
pcre2/libpcre2-16.pc.in
vendored
2
pcre2/libpcre2-16.pc.in
vendored
@ -8,6 +8,6 @@ includedir=@includedir@
|
||||
Name: libpcre2-16
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-16
|
||||
Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
|
2
pcre2/libpcre2-32.pc.in
vendored
2
pcre2/libpcre2-32.pc.in
vendored
@ -8,6 +8,6 @@ includedir=@includedir@
|
||||
Name: libpcre2-32
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-32
|
||||
Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
|
2
pcre2/libpcre2-8.pc.in
vendored
2
pcre2/libpcre2-8.pc.in
vendored
@ -8,6 +8,6 @@ includedir=@includedir@
|
||||
Name: libpcre2-8
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-8
|
||||
Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
|
2
pcre2/libpcre2-posix.pc.in
vendored
2
pcre2/libpcre2-posix.pc.in
vendored
@ -8,6 +8,6 @@ includedir=@includedir@
|
||||
Name: libpcre2-posix
|
||||
Description: Posix compatible interface to libpcre2-8
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-posix
|
||||
Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
Requires.private: libpcre2-8
|
||||
|
215
pcre2/missing
vendored
Executable file
215
pcre2/missing
vendored
Executable file
@ -0,0 +1,215 @@
|
||||
#! /bin/sh
|
||||
# Common wrapper for a few potentially missing GNU programs.
|
||||
|
||||
scriptversion=2018-03-07.03; # UTC
|
||||
|
||||
# Copyright (C) 1996-2020 Free Software Foundation, Inc.
|
||||
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo 1>&2 "Try '$0 --help' for more information"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case $1 in
|
||||
|
||||
--is-lightweight)
|
||||
# Used by our autoconf macros to check whether the available missing
|
||||
# script is modern enough.
|
||||
exit 0
|
||||
;;
|
||||
|
||||
--run)
|
||||
# Back-compat with the calling convention used by older automake.
|
||||
shift
|
||||
;;
|
||||
|
||||
-h|--h|--he|--hel|--help)
|
||||
echo "\
|
||||
$0 [OPTION]... PROGRAM [ARGUMENT]...
|
||||
|
||||
Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
|
||||
to PROGRAM being missing or too old.
|
||||
|
||||
Options:
|
||||
-h, --help display this help and exit
|
||||
-v, --version output version information and exit
|
||||
|
||||
Supported PROGRAM values:
|
||||
aclocal autoconf autoheader autom4te automake makeinfo
|
||||
bison yacc flex lex help2man
|
||||
|
||||
Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
|
||||
'g' are ignored when checking the name.
|
||||
|
||||
Send bug reports to <bug-automake@gnu.org>."
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
|
||||
echo "missing $scriptversion (GNU Automake)"
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-*)
|
||||
echo 1>&2 "$0: unknown '$1' option"
|
||||
echo 1>&2 "Try '$0 --help' for more information"
|
||||
exit 1
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
# Run the given program, remember its exit status.
|
||||
"$@"; st=$?
|
||||
|
||||
# If it succeeded, we are done.
|
||||
test $st -eq 0 && exit 0
|
||||
|
||||
# Also exit now if we it failed (or wasn't found), and '--version' was
|
||||
# passed; such an option is passed most likely to detect whether the
|
||||
# program is present and works.
|
||||
case $2 in --version|--help) exit $st;; esac
|
||||
|
||||
# Exit code 63 means version mismatch. This often happens when the user
|
||||
# tries to use an ancient version of a tool on a file that requires a
|
||||
# minimum version.
|
||||
if test $st -eq 63; then
|
||||
msg="probably too old"
|
||||
elif test $st -eq 127; then
|
||||
# Program was missing.
|
||||
msg="missing on your system"
|
||||
else
|
||||
# Program was found and executed, but failed. Give up.
|
||||
exit $st
|
||||
fi
|
||||
|
||||
perl_URL=https://www.perl.org/
|
||||
flex_URL=https://github.com/westes/flex
|
||||
gnu_software_URL=https://www.gnu.org/software
|
||||
|
||||
program_details ()
|
||||
{
|
||||
case $1 in
|
||||
aclocal|automake)
|
||||
echo "The '$1' program is part of the GNU Automake package:"
|
||||
echo "<$gnu_software_URL/automake>"
|
||||
echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
|
||||
echo "<$gnu_software_URL/autoconf>"
|
||||
echo "<$gnu_software_URL/m4/>"
|
||||
echo "<$perl_URL>"
|
||||
;;
|
||||
autoconf|autom4te|autoheader)
|
||||
echo "The '$1' program is part of the GNU Autoconf package:"
|
||||
echo "<$gnu_software_URL/autoconf/>"
|
||||
echo "It also requires GNU m4 and Perl in order to run:"
|
||||
echo "<$gnu_software_URL/m4/>"
|
||||
echo "<$perl_URL>"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
give_advice ()
|
||||
{
|
||||
# Normalize program name to check for.
|
||||
normalized_program=`echo "$1" | sed '
|
||||
s/^gnu-//; t
|
||||
s/^gnu//; t
|
||||
s/^g//; t'`
|
||||
|
||||
printf '%s\n' "'$1' is $msg."
|
||||
|
||||
configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
|
||||
case $normalized_program in
|
||||
autoconf*)
|
||||
echo "You should only need it if you modified 'configure.ac',"
|
||||
echo "or m4 files included by it."
|
||||
program_details 'autoconf'
|
||||
;;
|
||||
autoheader*)
|
||||
echo "You should only need it if you modified 'acconfig.h' or"
|
||||
echo "$configure_deps."
|
||||
program_details 'autoheader'
|
||||
;;
|
||||
automake*)
|
||||
echo "You should only need it if you modified 'Makefile.am' or"
|
||||
echo "$configure_deps."
|
||||
program_details 'automake'
|
||||
;;
|
||||
aclocal*)
|
||||
echo "You should only need it if you modified 'acinclude.m4' or"
|
||||
echo "$configure_deps."
|
||||
program_details 'aclocal'
|
||||
;;
|
||||
autom4te*)
|
||||
echo "You might have modified some maintainer files that require"
|
||||
echo "the 'autom4te' program to be rebuilt."
|
||||
program_details 'autom4te'
|
||||
;;
|
||||
bison*|yacc*)
|
||||
echo "You should only need it if you modified a '.y' file."
|
||||
echo "You may want to install the GNU Bison package:"
|
||||
echo "<$gnu_software_URL/bison/>"
|
||||
;;
|
||||
lex*|flex*)
|
||||
echo "You should only need it if you modified a '.l' file."
|
||||
echo "You may want to install the Fast Lexical Analyzer package:"
|
||||
echo "<$flex_URL>"
|
||||
;;
|
||||
help2man*)
|
||||
echo "You should only need it if you modified a dependency" \
|
||||
"of a man page."
|
||||
echo "You may want to install the GNU Help2man package:"
|
||||
echo "<$gnu_software_URL/help2man/>"
|
||||
;;
|
||||
makeinfo*)
|
||||
echo "You should only need it if you modified a '.texi' file, or"
|
||||
echo "any other file indirectly affecting the aspect of the manual."
|
||||
echo "You might want to install the Texinfo package:"
|
||||
echo "<$gnu_software_URL/texinfo/>"
|
||||
echo "The spurious makeinfo call might also be the consequence of"
|
||||
echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
|
||||
echo "want to install GNU make:"
|
||||
echo "<$gnu_software_URL/make/>"
|
||||
;;
|
||||
*)
|
||||
echo "You might have modified some files without having the proper"
|
||||
echo "tools for further handling them. Check the 'README' file, it"
|
||||
echo "often tells you about the needed prerequisites for installing"
|
||||
echo "this package. You may also peek at any GNU archive site, in"
|
||||
echo "case some other package contains this missing '$1' program."
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
give_advice "$1" | sed -e '1s/^/WARNING: /' \
|
||||
-e '2,$s/^/ /' >&2
|
||||
|
||||
# Propagate the correct exit status (expected to be 127 for a program
|
||||
# not found, 63 for a program that failed due to version mismatch).
|
||||
exit $st
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'before-save-hook 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC0"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
8
pcre2/pcre2-config.in
vendored
8
pcre2/pcre2-config.in
vendored
@ -86,28 +86,28 @@ while test $# -gt 0; do
|
||||
;;
|
||||
--libs-posix)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
echo $libS$libR -lpcre2-posix -lpcre2-8
|
||||
echo $libS$libR -lpcre2-posix@LIB_POSTFIX@ -lpcre2-8@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs8)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
echo $libS$libR -lpcre2-8
|
||||
echo $libS$libR -lpcre2-8@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs16)
|
||||
if test @enable_pcre2_16@ = yes ; then
|
||||
echo $libS$libR -lpcre2-16
|
||||
echo $libS$libR -lpcre2-16@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs32)
|
||||
if test @enable_pcre2_32@ = yes ; then
|
||||
echo $libS$libR -lpcre2-32
|
||||
echo $libS$libR -lpcre2-32@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
|
381
pcre2/src/config.h.generic
Normal file
381
pcre2/src/config.h.generic
Normal file
@ -0,0 +1,381 @@
|
||||
/* src/config.h. Generated from config.h.in by configure. */
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed). */
|
||||
/* #undef DISABLE_PERCENT_ZT */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
/* #undef EBCDIC_NL25 */
|
||||
|
||||
/* Define this if your compiler supports __attribute__((uninitialized)) */
|
||||
/* #undef HAVE_ATTRIBUTE_UNINITIALIZED */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
/* #undef HAVE_BCOPY */
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
/* #undef HAVE_BZLIB_H */
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
/* #undef HAVE_DIRENT_H */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
/* #undef HAVE_DLFCN_H */
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
/* #undef HAVE_INTTYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
/* #undef HAVE_LIMITS_H */
|
||||
|
||||
/* Define to 1 if you have the `memfd_create' function. */
|
||||
/* #undef HAVE_MEMFD_CREATE */
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
/* #undef HAVE_MEMMOVE */
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
/* #undef HAVE_MEMORY_H */
|
||||
|
||||
/* Define to 1 if you have the `mkostemp' function. */
|
||||
/* #undef HAVE_MKOSTEMP */
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
/* #undef HAVE_PTHREAD */
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
/* #undef HAVE_READLINE_HISTORY_H */
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the `secure_getenv' function. */
|
||||
/* #undef HAVE_SECURE_GETENV */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
/* #undef HAVE_STDINT_H */
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
/* #undef HAVE_STDLIB_H */
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
/* #undef HAVE_STRERROR */
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
/* #undef HAVE_STRINGS_H */
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
/* #undef HAVE_STRING_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
/* #undef HAVE_SYS_STAT_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
/* #undef HAVE_SYS_TYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
/* #undef HAVE_SYS_WAIT_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
/* #undef HAVE_UNISTD_H */
|
||||
|
||||
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||
/* #undef HAVE_VISIBILITY */
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
/* #undef HAVE_ZLIB_H */
|
||||
|
||||
/* This limits the amount of memory that may be used while matching a pattern.
|
||||
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||
#ifndef HEAP_LIMIT
|
||||
#define HEAP_LIMIT 20000000
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 65535 code units long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||
instead. This allows for longer patterns in extreme cases. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
/* This is ignored unless you are using libtool. */
|
||||
#ifndef LT_OBJDIR
|
||||
#define LT_OBJDIR ".libs/"
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take for ever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||
In some environments it is desirable to limit the nesting of backtracking
|
||||
(that is, the depth of tree that is searched) more strictly, in order to
|
||||
restrict the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||
the internal nested function calls that are used for pattern recursions,
|
||||
lookarounds, and atomic groups. */
|
||||
#ifndef MATCH_LIMIT_DEPTH
|
||||
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
/* #undef NEVER_BACKSLASH_C */
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||
(ANYCRLF), and 6 (NUL). */
|
||||
#ifndef NEWLINE_DEFAULT
|
||||
#define NEWLINE_DEFAULT 2
|
||||
#endif
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre2"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.36"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.36"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#ifndef PARENS_NEST_LIMIT
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
#endif
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||
very long lines. The actual amount of memory used by pcre2grep is three
|
||||
times this number, because it allows for the buffering of "before" and
|
||||
"after" lines. */
|
||||
#ifndef PCRE2GREP_BUFSIZE
|
||||
#define PCRE2GREP_BUFSIZE 20480
|
||||
#endif
|
||||
|
||||
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||
amount of memory used by pcre2grep is three times this number, because it
|
||||
allows for the buffering of "before" and "after" lines. */
|
||||
#ifndef PCRE2GREP_MAX_BUFSIZE
|
||||
#define PCRE2GREP_MAX_BUFSIZE 1048576
|
||||
#endif
|
||||
|
||||
/* Define to any value to include debugging code. */
|
||||
/* #undef PCRE2_DEBUG */
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
/* #undef PCRE2_EXP_DEFN */
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE2_STATIC */
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||
|
||||
/* Define to any non-zero number to enable support for SELinux compatible
|
||||
executable memory allocator in JIT. Note that this will have no effect
|
||||
unless SUPPORT_JIT is also defined. */
|
||||
/* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
/* #undef STDC_HEADERS */
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
/* #undef SUPPORT_JIT */
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
/* #undef SUPPORT_LIBEDIT */
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to any value to enable callout script support in pcre2grep. */
|
||||
/* #undef SUPPORT_PCRE2GREP_CALLOUT */
|
||||
|
||||
/* Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
|
||||
*/
|
||||
/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||
have no effect unless SUPPORT_JIT is also defined. */
|
||||
/* #undef SUPPORT_PCRE2GREP_JIT */
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_16 */
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_32 */
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_8 */
|
||||
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
/* #undef SUPPORT_UNICODE */
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Enable extensions on AIX 3, Interix. */
|
||||
#ifndef _ALL_SOURCE
|
||||
# define _ALL_SOURCE 1
|
||||
#endif
|
||||
/* Enable GNU extensions on systems that have them. */
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE 1
|
||||
#endif
|
||||
/* Enable threading extensions on Solaris. */
|
||||
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||
# define _POSIX_PTHREAD_SEMANTICS 1
|
||||
#endif
|
||||
/* Enable extensions on HP NonStop. */
|
||||
#ifndef _TANDEM_SOURCE
|
||||
# define _TANDEM_SOURCE 1
|
||||
#endif
|
||||
/* Enable general extensions on Solaris. */
|
||||
#ifndef __EXTENSIONS__
|
||||
# define __EXTENSIONS__ 1
|
||||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.36"
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
||||
/* Define to 2 if the system does not provide POSIX.1 features except with
|
||||
this defined. */
|
||||
/* #undef _POSIX_1_SOURCE */
|
||||
|
||||
/* Define to 1 if you need to in order for `stat' and other things to work. */
|
||||
/* #undef _POSIX_SOURCE */
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef int64_t */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
369
pcre2/src/config.h.in
Normal file
369
pcre2/src/config.h.in
Normal file
@ -0,0 +1,369 @@
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
#undef BSR_ANYCRLF
|
||||
|
||||
/* Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed). */
|
||||
#undef DISABLE_PERCENT_ZT
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
#undef EBCDIC
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
#undef EBCDIC_NL25
|
||||
|
||||
/* Define this if your compiler supports __attribute__((uninitialized)) */
|
||||
#undef HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#undef HAVE_BCOPY
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
#undef HAVE_BZLIB_H
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#undef HAVE_DIRENT_H
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
#undef HAVE_EDITLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
#undef HAVE_EDIT_READLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#undef HAVE_LIMITS_H
|
||||
|
||||
/* Define to 1 if you have the `memfd_create' function. */
|
||||
#undef HAVE_MEMFD_CREATE
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#undef HAVE_MEMMOVE
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the `mkostemp' function. */
|
||||
#undef HAVE_MKOSTEMP
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
#undef HAVE_PTHREAD
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
#undef HAVE_READLINE_HISTORY_H
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
#undef HAVE_READLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the `secure_getenv' function. */
|
||||
#undef HAVE_SECURE_GETENV
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#undef HAVE_STRERROR
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
#undef HAVE_SYS_WAIT_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||
#undef HAVE_VISIBILITY
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
#undef HAVE_WINDOWS_H
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#undef HAVE_ZLIB_H
|
||||
|
||||
/* This limits the amount of memory that may be used while matching a pattern.
|
||||
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||
#undef HEAP_LIMIT
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 65535 code units long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||
instead. This allows for longer patterns in extreme cases. */
|
||||
#undef LINK_SIZE
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take for ever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases. */
|
||||
#undef MATCH_LIMIT
|
||||
|
||||
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||
In some environments it is desirable to limit the nesting of backtracking
|
||||
(that is, the depth of tree that is searched) more strictly, in order to
|
||||
restrict the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||
the internal nested function calls that are used for pattern recursions,
|
||||
lookarounds, and atomic groups. */
|
||||
#undef MATCH_LIMIT_DEPTH
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_COUNT
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_SIZE
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
#undef NEVER_BACKSLASH_C
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||
(ANYCRLF), and 6 (NUL). */
|
||||
#undef NEWLINE_DEFAULT
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#undef PARENS_NEST_LIMIT
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||
very long lines. The actual amount of memory used by pcre2grep is three
|
||||
times this number, because it allows for the buffering of "before" and
|
||||
"after" lines. */
|
||||
#undef PCRE2GREP_BUFSIZE
|
||||
|
||||
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||
amount of memory used by pcre2grep is three times this number, because it
|
||||
allows for the buffering of "before" and "after" lines. */
|
||||
#undef PCRE2GREP_MAX_BUFSIZE
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2POSIX_EXP_DECL
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2POSIX_EXP_DEFN
|
||||
|
||||
/* Define to any value to include debugging code. */
|
||||
#undef PCRE2_DEBUG
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2_EXP_DECL
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
#undef PCRE2_EXP_DEFN
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
#undef PCRE2_STATIC
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
#undef PTHREAD_CREATE_JOINABLE
|
||||
|
||||
/* Define to any non-zero number to enable support for SELinux compatible
|
||||
executable memory allocator in JIT. Note that this will have no effect
|
||||
unless SUPPORT_JIT is also defined. */
|
||||
#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
#undef SUPPORT_JIT
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
#undef SUPPORT_LIBBZ2
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
#undef SUPPORT_LIBEDIT
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
#undef SUPPORT_LIBREADLINE
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
#undef SUPPORT_LIBZ
|
||||
|
||||
/* Define to any value to enable callout script support in pcre2grep. */
|
||||
#undef SUPPORT_PCRE2GREP_CALLOUT
|
||||
|
||||
/* Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
|
||||
*/
|
||||
#undef SUPPORT_PCRE2GREP_CALLOUT_FORK
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||
have no effect unless SUPPORT_JIT is also defined. */
|
||||
#undef SUPPORT_PCRE2GREP_JIT
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_16
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_32
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_8
|
||||
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
#undef SUPPORT_UNICODE
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
#undef SUPPORT_VALGRIND
|
||||
|
||||
/* Enable extensions on AIX 3, Interix. */
|
||||
#ifndef _ALL_SOURCE
|
||||
# undef _ALL_SOURCE
|
||||
#endif
|
||||
/* Enable GNU extensions on systems that have them. */
|
||||
#ifndef _GNU_SOURCE
|
||||
# undef _GNU_SOURCE
|
||||
#endif
|
||||
/* Enable threading extensions on Solaris. */
|
||||
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||
# undef _POSIX_PTHREAD_SEMANTICS
|
||||
#endif
|
||||
/* Enable extensions on HP NonStop. */
|
||||
#ifndef _TANDEM_SOURCE
|
||||
# undef _TANDEM_SOURCE
|
||||
#endif
|
||||
/* Enable general extensions on Solaris. */
|
||||
#ifndef __EXTENSIONS__
|
||||
# undef __EXTENSIONS__
|
||||
#endif
|
||||
|
||||
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
#undef _MINIX
|
||||
|
||||
/* Define to 2 if the system does not provide POSIX.1 features except with
|
||||
this defined. */
|
||||
#undef _POSIX_1_SOURCE
|
||||
|
||||
/* Define to 1 if you need to in order for `stat' and other things to work. */
|
||||
#undef _POSIX_SOURCE
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
#undef const
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef int64_t
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
#undef size_t
|
991
pcre2/src/pcre2.h.generic
Normal file
991
pcre2/src/pcre2.h.generic
Normal file
@ -0,0 +1,991 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 36
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2020-12-04
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# ifndef PCRE2_EXP_DECL
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
|
||||
not have stdint.h, which is why we use inttypes.h, which according to the C
|
||||
standard is a superset of stdint.h. If none of these headers are available,
|
||||
the relevant values must be provided by some other means. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
#define PCRE2_ENDANCHORED 0x20000000u
|
||||
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
|
||||
|
||||
/* An additional compile options word is available in the compile context. */
|
||||
|
||||
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
|
||||
pcre2_substitute(). Some are allowed only for one of the functions, and in
|
||||
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
|
||||
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
|
||||
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
|
||||
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_CONVERT_UTF 0x00000001u
|
||||
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
|
||||
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
|
||||
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
|
||||
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
#define PCRE2_NEWLINE_NUL 6
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||
pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||
/* Error 159 is obsolete and should now never occur */
|
||||
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
|
||||
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
|
||||
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
|
||||
#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198
|
||||
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||
functions, context functions, and serializing functions. They are in numerical
|
||||
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||
released, the numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_DEPTHLIMIT (-53)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
|
||||
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
#define PCRE2_INFO_ALLOPTIONS 0
|
||||
#define PCRE2_INFO_ARGOPTIONS 1
|
||||
#define PCRE2_INFO_BACKREFMAX 2
|
||||
#define PCRE2_INFO_BSR 3
|
||||
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||
#define PCRE2_INFO_HASCRORLF 8
|
||||
#define PCRE2_INFO_JCHANGED 9
|
||||
#define PCRE2_INFO_JITSIZE 10
|
||||
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||
#define PCRE2_INFO_LASTCODETYPE 12
|
||||
#define PCRE2_INFO_MATCHEMPTY 13
|
||||
#define PCRE2_INFO_MATCHLIMIT 14
|
||||
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||
#define PCRE2_INFO_MINLENGTH 16
|
||||
#define PCRE2_INFO_NAMECOUNT 17
|
||||
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||
#define PCRE2_INFO_NAMETABLE 19
|
||||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_DEPTHLIMIT 21
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
#define PCRE2_CONFIG_BSR 0
|
||||
#define PCRE2_CONFIG_JIT 1
|
||||
#define PCRE2_CONFIG_JITTARGET 2
|
||||
#define PCRE2_CONFIG_LINKSIZE 3
|
||||
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||
#define PCRE2_CONFIG_NEWLINE 5
|
||||
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||
#define PCRE2_CONFIG_DEPTHLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
typedef uint8_t PCRE2_UCHAR8;
|
||||
typedef uint16_t PCRE2_UCHAR16;
|
||||
typedef uint32_t PCRE2_UCHAR32;
|
||||
|
||||
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
||||
#define PCRE2_TYPES_LIST \
|
||||
struct pcre2_real_general_context; \
|
||||
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||
\
|
||||
struct pcre2_real_compile_context; \
|
||||
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||
\
|
||||
struct pcre2_real_match_context; \
|
||||
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||
\
|
||||
struct pcre2_real_convert_context; \
|
||||
typedef struct pcre2_real_convert_context pcre2_convert_context; \
|
||||
\
|
||||
struct pcre2_real_code; \
|
||||
typedef struct pcre2_real_code pcre2_code; \
|
||||
\
|
||||
struct pcre2_real_match_data; \
|
||||
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||
\
|
||||
struct pcre2_real_jit_stack; \
|
||||
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||
\
|
||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structures for passing out data via callout functions. We use structures
|
||||
so that new fields can be added on the end in future versions, without changing
|
||||
the API of the function, thereby allowing old clients to work without
|
||||
modification. Define the generic versions in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||
|
||||
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
|
||||
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
|
||||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
uint32_t callout_flags; /* See above for list */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block; \
|
||||
\
|
||||
typedef struct pcre2_callout_enumerate_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block; \
|
||||
\
|
||||
typedef struct pcre2_substitute_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SPTR input; /* Pointer to input subject string */ \
|
||||
PCRE2_SPTR output; /* Pointer to output buffer */ \
|
||||
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
|
||||
uint32_t oveccount; /* Count of pairs set in ovector */ \
|
||||
uint32_t subscount; /* Substitution number */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_substitute_callout_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
expanded for each width below. Start with functions that give general
|
||||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
||||
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
|
||||
int (*)(uint32_t, void *), void *);
|
||||
|
||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_match_context_copy(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *, \
|
||||
void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
|
||||
|
||||
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_convert_context_copy(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_convert_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_free(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
|
||||
|
||||
|
||||
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
|
||||
pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_callout_enumerate(const pcre2_code *, \
|
||||
int (*)(pcre2_callout_enumerate_block *, void *), void *);
|
||||
|
||||
|
||||
/* Functions for running a match and inspecting the result. */
|
||||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
|
||||
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
|
||||
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
*pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
|
||||
PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_free(PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
/* Functions for serializing / deserializing compiled patterns. */
|
||||
|
||||
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
|
||||
PCRE2_SIZE *, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_free(uint8_t *);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for converting pattern source strings. */
|
||||
|
||||
#define PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *, pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_converted_pattern_free(PCRE2_UCHAR *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \
|
||||
*pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
||||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
|
||||
*pcre2_maketables(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||
pcre2_compile are called by application code. */
|
||||
|
||||
#define PCRE2_JOIN(a,b) a ## b
|
||||
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||
|
||||
|
||||
/* Data types */
|
||||
|
||||
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||
|
||||
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||
|
||||
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
|
||||
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||
|
||||
|
||||
/* Data blocks */
|
||||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||
|
||||
|
||||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
|
||||
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
|
||||
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
|
||||
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
|
||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
|
||||
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||
|
||||
/* Keep this old function name for backwards compatibility */
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
|
||||
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
||||
/* Now generate all three sets of width-specific structures and function
|
||||
prototypes. */
|
||||
|
||||
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||
PCRE2_TYPES_LIST \
|
||||
PCRE2_STRUCTURE_LIST \
|
||||
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 8
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 16
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 32
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
/* Undefine the list macros; they are no longer needed. */
|
||||
|
||||
#undef PCRE2_TYPES_LIST
|
||||
#undef PCRE2_STRUCTURE_LIST
|
||||
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_FUNCTIONS
|
||||
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||
#undef PCRE2_MATCH_FUNCTIONS
|
||||
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||
#undef PCRE2_JIT_FUNCTIONS
|
||||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2.h */
|
@ -2344,7 +2344,7 @@ if (ptr > *nameptr + MAX_NAME_SIZE)
|
||||
*errorcodeptr = ERR48;
|
||||
goto FAILED;
|
||||
}
|
||||
*namelenptr = ptr - *nameptr;
|
||||
*namelenptr = (uint32_t)(ptr - *nameptr);
|
||||
|
||||
/* Subpattern names must not be empty, and their terminator is checked here.
|
||||
(What follows a verb or alpha assertion name is checked separately.) */
|
||||
@ -4331,6 +4331,7 @@ while (ptr < ptrend)
|
||||
{
|
||||
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
|
||||
minor = (*ptr++ - CHAR_0) * 10;
|
||||
if (ptr >= ptrend) goto BAD_VERSION_CONDITION;
|
||||
if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
|
||||
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
goto BAD_VERSION_CONDITION;
|
||||
|
303
pcre2/src/pcre2_dftables.c
Normal file
303
pcre2/src/pcre2_dftables.c
Normal file
@ -0,0 +1,303 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE2. The tables are built using the pcre2_maketables()
|
||||
function, which is part of the PCRE2 API. By default, the system's "C" locale
|
||||
is used rather than what the building user happens to have set, but the -L
|
||||
option can be used to select the current locale from the LC_ALL environment
|
||||
variable. By default, the tables are written in source form, but if -b is
|
||||
given, they are written in binary. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define PCRE2_DFTABLES /* pcre2_maketables.c notices this */
|
||||
#include "pcre2_maketables.c"
|
||||
|
||||
|
||||
static const char *classlist[] =
|
||||
{
|
||||
"space", "xdigit", "digit", "upper", "lower",
|
||||
"word", "graph", "print", "punct", "cntrl"
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Usage *
|
||||
*************************************************/
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
(void)fprintf(stderr,
|
||||
"Usage: pcre2_dftables [options] <output file>\n"
|
||||
" -b Write output in binary (default is source code)\n"
|
||||
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Entry point *
|
||||
*************************************************/
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
int i;
|
||||
int nclass = 0;
|
||||
BOOL binary = FALSE;
|
||||
char *env = (char *)"C";
|
||||
const unsigned char *tables;
|
||||
const unsigned char *base_of_tables;
|
||||
|
||||
/* Process options */
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
char *arg = argv[i];
|
||||
if (*arg != '-') break;
|
||||
|
||||
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||
{
|
||||
usage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-L") == 0)
|
||||
{
|
||||
if (setlocale(LC_ALL, "") == NULL)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
|
||||
return 1;
|
||||
}
|
||||
env = getenv("LC_ALL");
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-b") == 0)
|
||||
binary = TRUE;
|
||||
|
||||
else
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (i != argc - 1)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Make the tables */
|
||||
|
||||
tables = maketables();
|
||||
base_of_tables = tables;
|
||||
|
||||
f = fopen(argv[i], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If -b was specified, we write the tables in binary. */
|
||||
|
||||
if (binary)
|
||||
{
|
||||
int yield = 0;
|
||||
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
|
||||
if (len != TABLES_LENGTH)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
|
||||
"instead of %d\n", (int)len, TABLES_LENGTH);
|
||||
yield = 1;
|
||||
}
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* Write the tables as source code for inclusion in the PCRE2 library. There
|
||||
are several fprintf() calls here, because gcc in pedantic mode complains about
|
||||
the very long string otherwise. */
|
||||
|
||||
(void)fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file was automatically written by the pcre2_dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256. */\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This set of tables was written in the %s locale. */\n\n", env);
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
|
||||
"to build alternative versions of this file. This is necessary if you are\n"
|
||||
"running in an EBCDIC environment, or if you want to default to a different\n"
|
||||
"encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
|
||||
"these tables in the \"C\" locale by default. This happens automatically if\n"
|
||||
"PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
|
||||
"pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
|
||||
"locale. */\n\n");
|
||||
|
||||
/* Force config.h in z/OS */
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
(void)fprintf(f,
|
||||
"/* For z/OS, config.h is forced */\n"
|
||||
"#ifndef HAVE_CONFIG_H\n"
|
||||
"#define HAVE_CONFIG_H 1\n"
|
||||
"#endif\n\n");
|
||||
#endif
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* The following #include is present because without it gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE2 is built into a static\n"
|
||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
||||
"outside this compilation unit might reference this\" and so it will always\n"
|
||||
"be supplied to the linker. */\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"#ifdef HAVE_CONFIG_H\n"
|
||||
"#include \"config.h\"\n"
|
||||
"#endif\n\n"
|
||||
"#include \"pcre2_internal.h\"\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This table contains bit maps for various character classes. Each map is 32\n"
|
||||
"bytes long and the bits run from the least significant end of each byte. The\n"
|
||||
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
|
||||
"graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) (void)fprintf(f, "\n");
|
||||
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
||||
(void)fprintf(f, "\n ");
|
||||
}
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) (void)fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
" 0x%02x lower case letter\n"
|
||||
" 0x%02x decimal digit\n"
|
||||
" 0x%02x alphanumeric or '_'\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
(void)fprintf(f, " /* ");
|
||||
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||
else (void)fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||
else (void)fprintf(f, "%3d", i-1);
|
||||
(void)fprintf(f, " */\n ");
|
||||
}
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) (void)fprintf(f, ",");
|
||||
}
|
||||
|
||||
(void)fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||
else (void)fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||
else (void)fprintf(f, "%3d", i-1);
|
||||
(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_dftables.c */
|
365
pcre2/src/pcre2_fuzzsupport.c
Normal file
365
pcre2/src/pcre2_fuzzsupport.c
Normal file
@ -0,0 +1,365 @@
|
||||
/***************************************************************************
|
||||
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
|
||||
tries to compile and match it, deriving options from the string itself. If
|
||||
STANDALONE is defined, a main program that calls the driver with the contents
|
||||
of specified files is compiled, and commentary on what is happening is output.
|
||||
If an argument starts with '=' the rest of it it is taken as a literal string
|
||||
rather than a file name. This allows easy testing of short strings.
|
||||
|
||||
Written by Philip Hazel, October 2016
|
||||
***************************************************************************/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include "pcre2.h"
|
||||
|
||||
#define MAX_MATCH_SIZE 1000
|
||||
|
||||
#define DFA_WORKSPACE_COUNT 100
|
||||
|
||||
#define ALLOWED_COMPILE_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
||||
PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
|
||||
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
|
||||
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
|
||||
PCRE2_NO_AUTO_CAPTURE| \
|
||||
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
|
||||
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
|
||||
PCRE2_UTF)
|
||||
|
||||
#define ALLOWED_MATCH_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
|
||||
PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
|
||||
|
||||
/* This is the callout function. Its only purpose is to halt matching if there
|
||||
are more than 100 callouts, as one way of stopping too much time being spent on
|
||||
fruitless matches. The callout data is a pointer to the counter. */
|
||||
|
||||
static int callout_function(pcre2_callout_block *cb, void *callout_data)
|
||||
{
|
||||
(void)cb; /* Avoid unused parameter warning */
|
||||
*((uint32_t *)callout_data) += 1;
|
||||
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
|
||||
}
|
||||
|
||||
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
|
||||
"no previous prototype" warning when compiling at high warning level. */
|
||||
|
||||
int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
|
||||
|
||||
/* Here's the driving function. */
|
||||
|
||||
int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
|
||||
{
|
||||
uint32_t compile_options;
|
||||
uint32_t match_options;
|
||||
pcre2_match_data *match_data = NULL;
|
||||
pcre2_match_context *match_context = NULL;
|
||||
size_t match_size;
|
||||
int dfa_workspace[DFA_WORKSPACE_COUNT];
|
||||
int r1, r2;
|
||||
int i;
|
||||
|
||||
if (size < 1) return 0;
|
||||
|
||||
/* Limiting the length of the subject for matching stops fruitless searches
|
||||
in large trees taking too much time. */
|
||||
|
||||
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
|
||||
|
||||
/* Figure out some options to use. Initialize the random number to ensure
|
||||
repeatability. Ensure that we get a 32-bit unsigned random number for testing
|
||||
options. (RAND_MAX is required to be at least 32767, but is commonly
|
||||
2147483647, which excludes the top bit.) */
|
||||
|
||||
srand((unsigned int)(data[size/2]));
|
||||
r1 = rand();
|
||||
r2 = rand();
|
||||
|
||||
/* Ensure that all undefined option bits are zero (waste of time trying them)
|
||||
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
|
||||
input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
|
||||
reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
|
||||
\C in random patterns is highly likely to cause a crash. */
|
||||
|
||||
compile_options =
|
||||
((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
|
||||
PCRE2_NEVER_BACKSLASH_C;
|
||||
|
||||
match_options =
|
||||
((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
|
||||
|
||||
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
|
||||
allowed together and just give an immediate error return. */
|
||||
|
||||
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
|
||||
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
|
||||
|
||||
/* Do the compile with and without the options, and after a successful compile,
|
||||
likewise do the match with and without the options. */
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
uint32_t callout_count;
|
||||
int errorcode;
|
||||
PCRE2_SIZE erroroffset;
|
||||
pcre2_code *code;
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("Compile options %.8x never_backslash_c", compile_options);
|
||||
printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
|
||||
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
|
||||
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
|
||||
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
|
||||
((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
||||
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
|
||||
((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
|
||||
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
|
||||
((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
|
||||
((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
|
||||
((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
||||
((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
|
||||
((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
|
||||
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
|
||||
((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
|
||||
((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
|
||||
((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
|
||||
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
|
||||
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
|
||||
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
|
||||
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
||||
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
|
||||
((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
|
||||
((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
|
||||
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
|
||||
((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
|
||||
#endif
|
||||
|
||||
code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
|
||||
&errorcode, &erroroffset, NULL);
|
||||
|
||||
/* Compilation succeeded */
|
||||
|
||||
if (code != NULL)
|
||||
{
|
||||
int j;
|
||||
uint32_t save_match_options = match_options;
|
||||
|
||||
/* Create match data and context blocks only when we first need them. Set
|
||||
low match and depth limits to avoid wasting too much searching large
|
||||
pattern trees. Almost all matches are going to fail. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
match_data = pcre2_match_data_create(32, NULL);
|
||||
if (match_data == NULL)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("** Failed to create match data block\n");
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (match_context == NULL)
|
||||
{
|
||||
match_context = pcre2_match_context_create(NULL);
|
||||
if (match_context == NULL)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("** Failed to create match context block\n");
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
(void)pcre2_set_match_limit(match_context, 100);
|
||||
(void)pcre2_set_depth_limit(match_context, 100);
|
||||
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
|
||||
}
|
||||
|
||||
/* Match twice, with and without options. */
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Match options %.8x", match_options);
|
||||
printf("%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
||||
((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
||||
((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
|
||||
((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
||||
((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
|
||||
((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
|
||||
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
|
||||
((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
|
||||
((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
|
||||
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
|
||||
#endif
|
||||
|
||||
callout_count = 0;
|
||||
errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
|
||||
match_options, match_data, match_context);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
||||
{
|
||||
unsigned char buffer[256];
|
||||
pcre2_get_error_message(errorcode, buffer, 256);
|
||||
printf("Match failed: error %d: %s\n", errorcode, buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
match_options = 0; /* For second time */
|
||||
}
|
||||
|
||||
/* Match with DFA twice, with and without options. */
|
||||
|
||||
match_options = save_match_options & ~PCRE2_NO_JIT; /* Not valid for DFA */
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("DFA match options %.8x", match_options);
|
||||
printf("%s%s%s%s%s%s%s%s%s\n",
|
||||
((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
||||
((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
||||
((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
||||
((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
|
||||
((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
|
||||
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
|
||||
((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
|
||||
((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
|
||||
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
|
||||
#endif
|
||||
|
||||
callout_count = 0;
|
||||
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
|
||||
(PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
|
||||
dfa_workspace, DFA_WORKSPACE_COUNT);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
||||
{
|
||||
unsigned char buffer[256];
|
||||
pcre2_get_error_message(errorcode, buffer, 256);
|
||||
printf("Match failed: error %d: %s\n", errorcode, buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
match_options = 0; /* For second time */
|
||||
}
|
||||
|
||||
match_options = save_match_options; /* Reset for the second compile */
|
||||
pcre2_code_free(code);
|
||||
}
|
||||
|
||||
/* Compilation failed */
|
||||
|
||||
else
|
||||
{
|
||||
unsigned char buffer[256];
|
||||
pcre2_get_error_message(errorcode, buffer, 256);
|
||||
#ifdef STANDALONE
|
||||
printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
|
||||
#else
|
||||
if (strstr((const char *)buffer, "internal error") != NULL) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
|
||||
}
|
||||
|
||||
if (match_data != NULL) pcre2_match_data_free(match_data);
|
||||
if (match_context != NULL) pcre2_match_context_free(match_context);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Optional main program. */
|
||||
|
||||
#ifdef STANDALONE
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (argc < 2)
|
||||
{
|
||||
printf("** No arguments given\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
size_t filelen;
|
||||
size_t readsize;
|
||||
unsigned char *buffer;
|
||||
FILE *f;
|
||||
|
||||
/* Handle a literal string. Copy to an exact size buffer so that checks for
|
||||
overrunning work. */
|
||||
|
||||
if (argv[i][0] == '=')
|
||||
{
|
||||
readsize = strlen(argv[i]) - 1;
|
||||
printf("------ <Literal> ------\n");
|
||||
printf("Length = %lu\n", readsize);
|
||||
printf("%.*s\n", (int)readsize, argv[i]+1);
|
||||
buffer = (unsigned char *)malloc(readsize);
|
||||
if (buffer == NULL)
|
||||
printf("** Failed to allocate %lu bytes of memory\n", readsize);
|
||||
else
|
||||
{
|
||||
memcpy(buffer, argv[i]+1, readsize);
|
||||
LLVMFuzzerTestOneInput(buffer, readsize);
|
||||
free(buffer);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a string given in a file */
|
||||
|
||||
f = fopen(argv[i], "rb");
|
||||
if (f == NULL)
|
||||
{
|
||||
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("------ %s ------\n", argv[i]);
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
filelen = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
buffer = (unsigned char *)malloc(filelen);
|
||||
if (buffer == NULL)
|
||||
{
|
||||
printf("** Failed to allocate %lu bytes of memory\n", filelen);
|
||||
fclose(f);
|
||||
continue;
|
||||
}
|
||||
|
||||
readsize = fread(buffer, 1, filelen, f);
|
||||
fclose(f);
|
||||
|
||||
if (readsize != filelen)
|
||||
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
|
||||
else
|
||||
{
|
||||
printf("Length = %lu\n", filelen);
|
||||
LLVMFuzzerTestOneInput(buffer, filelen);
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* STANDALONE */
|
||||
|
||||
/* End */
|
@ -1466,9 +1466,9 @@ do
|
||||
default:
|
||||
accelerated_start = NULL;
|
||||
fast_forward_allowed = FALSE;
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
break;
|
||||
|
||||
case OP_ONCE:
|
||||
case OP_BRA:
|
||||
@ -1834,57 +1834,57 @@ while (cc < ccend)
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
case OP_BRAPOSZERO:
|
||||
repeat_check = FALSE;
|
||||
size = 1;
|
||||
repeat_check = FALSE;
|
||||
break;
|
||||
|
||||
CASE_ITERATOR_PRIVATE_DATA_1
|
||||
space = 1;
|
||||
size = -2;
|
||||
space = 1;
|
||||
break;
|
||||
|
||||
CASE_ITERATOR_PRIVATE_DATA_2A
|
||||
space = 2;
|
||||
size = -2;
|
||||
space = 2;
|
||||
break;
|
||||
|
||||
CASE_ITERATOR_PRIVATE_DATA_2B
|
||||
space = 2;
|
||||
size = -(2 + IMM2_SIZE);
|
||||
space = 2;
|
||||
break;
|
||||
|
||||
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
|
||||
space = 1;
|
||||
size = 1;
|
||||
space = 1;
|
||||
break;
|
||||
|
||||
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
|
||||
size = 1;
|
||||
if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
|
||||
space = 2;
|
||||
size = 1;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
size = 1 + IMM2_SIZE;
|
||||
if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
|
||||
space = 2;
|
||||
size = 1 + IMM2_SIZE;
|
||||
break;
|
||||
|
||||
case OP_TYPEMINUPTO:
|
||||
space = 2;
|
||||
size = 1 + IMM2_SIZE;
|
||||
space = 2;
|
||||
break;
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
space = get_class_iterator_size(cc + size);
|
||||
size = 1 + 32 / sizeof(PCRE2_UCHAR);
|
||||
space = get_class_iterator_size(cc + size);
|
||||
break;
|
||||
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
case OP_XCLASS:
|
||||
space = get_class_iterator_size(cc + size);
|
||||
size = GET(cc, 1);
|
||||
space = get_class_iterator_size(cc + size);
|
||||
break;
|
||||
#endif
|
||||
|
||||
@ -4578,7 +4578,14 @@ if (common->nltype != NLTYPE_ANY)
|
||||
/* All newlines are ascii, just skip intermediate octets. */
|
||||
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
loop = LABEL();
|
||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
|
||||
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
|
||||
else
|
||||
{
|
||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
}
|
||||
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
|
||||
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
@ -6161,9 +6168,9 @@ static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
struct sljit_label *loop;
|
||||
struct sljit_jump *lastchar;
|
||||
struct sljit_jump *lastchar = NULL;
|
||||
struct sljit_jump *firstchar;
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *quit = NULL;
|
||||
struct sljit_jump *foundcr = NULL;
|
||||
struct sljit_jump *notfoundnl;
|
||||
jump_list *newline = NULL;
|
||||
@ -6176,39 +6183,71 @@ if (common->match_end_ptr != 0)
|
||||
|
||||
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
|
||||
{
|
||||
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (HAS_VIRTUAL_REGISTERS)
|
||||
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
|
||||
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
|
||||
if (HAS_VIRTUAL_REGISTERS)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
|
||||
}
|
||||
else
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
|
||||
}
|
||||
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
|
||||
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
|
||||
#endif
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
|
||||
}
|
||||
else
|
||||
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
|
||||
}
|
||||
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
|
||||
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (HAS_VIRTUAL_REGISTERS)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
|
||||
}
|
||||
else
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
|
||||
}
|
||||
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
|
||||
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
|
||||
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
|
||||
#endif
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
loop = LABEL();
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
|
||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
|
||||
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
|
||||
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
|
||||
loop = LABEL();
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
|
||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
|
||||
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
|
||||
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
|
||||
|
||||
JUMPHERE(quit);
|
||||
JUMPHERE(lastchar);
|
||||
}
|
||||
|
||||
JUMPHERE(quit);
|
||||
JUMPHERE(firstchar);
|
||||
JUMPHERE(lastchar);
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||
@ -6225,22 +6264,59 @@ else
|
||||
|
||||
/* Example: match /^/ to \r\n from offset 1. */
|
||||
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
|
||||
move_back(common, NULL, FALSE);
|
||||
|
||||
if (common->nltype == NLTYPE_ANY)
|
||||
move_back(common, NULL, FALSE);
|
||||
else
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
|
||||
loop = LABEL();
|
||||
common->ff_newline_shortcut = loop;
|
||||
|
||||
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
|
||||
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
|
||||
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
||||
check_newlinechar(common, common->nltype, &newline, FALSE);
|
||||
set_jumps(newline, loop);
|
||||
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
|
||||
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
|
||||
{
|
||||
if (common->nltype == NLTYPE_ANYCRLF)
|
||||
{
|
||||
fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
||||
}
|
||||
else
|
||||
{
|
||||
fast_forward_char_simd(common, common->newline, common->newline, 0);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
|
||||
CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
|
||||
{
|
||||
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
|
||||
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
|
||||
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
||||
check_newlinechar(common, common->nltype, &newline, FALSE);
|
||||
set_jumps(newline, loop);
|
||||
}
|
||||
|
||||
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
|
||||
{
|
||||
quit = JUMP(SLJIT_JUMP);
|
||||
JUMPHERE(foundcr);
|
||||
if (quit == NULL)
|
||||
{
|
||||
quit = JUMP(SLJIT_JUMP);
|
||||
JUMPHERE(foundcr);
|
||||
}
|
||||
|
||||
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
|
||||
@ -6252,7 +6328,9 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
|
||||
JUMPHERE(notfoundnl);
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
JUMPHERE(lastchar);
|
||||
|
||||
if (lastchar)
|
||||
JUMPHERE(lastchar);
|
||||
JUMPHERE(firstchar);
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
@ -6493,9 +6571,11 @@ if (common->invalid_utf)
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
move_back(common, NULL, TRUE);
|
||||
check_start_used_ptr(common);
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
|
||||
}
|
||||
}
|
||||
@ -7594,25 +7674,43 @@ if (needstype || needsscript)
|
||||
}
|
||||
|
||||
cc = ccbegin;
|
||||
|
||||
if (needstype)
|
||||
{
|
||||
/* TMP2 has already been shifted by 2 */
|
||||
if (!needschar)
|
||||
{
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
typereg = RETURN_ADDR;
|
||||
}
|
||||
}
|
||||
else if (needschar)
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
}
|
||||
|
||||
if (needschar)
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
|
||||
if (needstype)
|
||||
else if (needstype)
|
||||
{
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
|
||||
if (!needschar)
|
||||
{
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
@ -7620,6 +7718,8 @@ if (needstype || needsscript)
|
||||
typereg = RETURN_ADDR;
|
||||
}
|
||||
}
|
||||
else if (needschar)
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
@ -13581,7 +13681,7 @@ if (common->has_then)
|
||||
set_then_offsets(common, common->start, NULL);
|
||||
}
|
||||
|
||||
compiler = sljit_create_compiler(allocator_data);
|
||||
compiler = sljit_create_compiler(allocator_data, NULL);
|
||||
if (!compiler)
|
||||
{
|
||||
SLJIT_FREE(common->optimized_cbracket, allocator_data);
|
||||
@ -13983,7 +14083,7 @@ else
|
||||
{
|
||||
/* This case is highly unlikely since we just recently
|
||||
freed a lot of memory. Not impossible though. */
|
||||
sljit_free_code(executable_func);
|
||||
sljit_free_code(executable_func, NULL);
|
||||
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
@ -14097,13 +14197,13 @@ if (executable_allocator_is_working == 0)
|
||||
/* Checks whether the executable allocator is working. This check
|
||||
might run multiple times in multi-threaded environments, but the
|
||||
result should not be affected by it. */
|
||||
void *ptr = SLJIT_MALLOC_EXEC(32);
|
||||
void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
|
||||
|
||||
executable_allocator_is_working = -1;
|
||||
|
||||
if (ptr != NULL)
|
||||
{
|
||||
SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr));
|
||||
SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
|
||||
executable_allocator_is_working = 1;
|
||||
}
|
||||
}
|
||||
|
@ -89,7 +89,7 @@ int i;
|
||||
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
|
||||
{
|
||||
if (functions->executable_funcs[i] != NULL)
|
||||
sljit_free_code(functions->executable_funcs[i]);
|
||||
sljit_free_code(functions->executable_funcs[i], NULL);
|
||||
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
|
||||
}
|
||||
|
||||
|
347
pcre2/src/pcre2_jit_neon_inc.h
Normal file
347
pcre2/src/pcre2_jit_neon_inc.h
Normal file
@ -0,0 +1,347 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg and Sebastian Pop
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
# if defined(FFCS)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_utf
|
||||
# else
|
||||
# define FF_FUN ffcs
|
||||
# endif
|
||||
|
||||
# elif defined(FFCS_2)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_2_utf
|
||||
# else
|
||||
# define FF_FUN ffcs_2
|
||||
# endif
|
||||
|
||||
# elif defined(FFCS_MASK)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_mask_utf
|
||||
# else
|
||||
# define FF_FUN ffcs_mask
|
||||
# endif
|
||||
|
||||
# elif defined(FFCPS_0)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_0_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_0
|
||||
# endif
|
||||
|
||||
# elif defined (FFCPS_1)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_1_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_1
|
||||
# endif
|
||||
|
||||
# elif defined (FFCPS_DEFAULT)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_default_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_default
|
||||
# endif
|
||||
# endif
|
||||
|
||||
static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 *str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
|
||||
#undef FF_FUN
|
||||
{
|
||||
quad_word qw;
|
||||
int_char ic;
|
||||
|
||||
SLJIT_UNUSED_ARG(offs1);
|
||||
SLJIT_UNUSED_ARG(offs2);
|
||||
|
||||
ic.x = chars;
|
||||
|
||||
#if defined(FFCS)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
sljit_u8 c2 = ic.c.c2;
|
||||
vect_t vc2 = VDUPQ(c2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
sljit_u8 mask = ic.c.c2;
|
||||
vect_t vmask = VDUPQ(mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
compare_type compare1_type = compare_match1;
|
||||
compare_type compare2_type = compare_match1;
|
||||
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
|
||||
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
PCRE2_UCHAR char1a = ic.c.c1;
|
||||
PCRE2_UCHAR char2a = ic.c.c3;
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
# else
|
||||
PCRE2_UCHAR char1b = ic.c.c2;
|
||||
PCRE2_UCHAR char2b = ic.c.c4;
|
||||
if (char1a == char1b)
|
||||
{
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
}
|
||||
else
|
||||
{
|
||||
sljit_u32 bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = compare_match1i;
|
||||
cmp1a = VDUPQ(char1a | bit1);
|
||||
cmp1b = VDUPQ(bit1);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = compare_match2;
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp1b = VDUPQ(char1b);
|
||||
}
|
||||
}
|
||||
|
||||
if (char2a == char2b)
|
||||
{
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
}
|
||||
else
|
||||
{
|
||||
sljit_u32 bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = compare_match1i;
|
||||
cmp2a = VDUPQ(char2a | bit2);
|
||||
cmp2b = VDUPQ(bit2);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = compare_match2;
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp2b = VDUPQ(char2b);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
str_ptr += IN_UCHARS(offs1);
|
||||
#endif
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
vect_t char_mask = VDUPQ(0xff);
|
||||
#endif
|
||||
|
||||
#if defined(FF_UTF)
|
||||
restart:;
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
sljit_u8 *p1 = str_ptr - diff;
|
||||
#endif
|
||||
sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
|
||||
str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf);
|
||||
vect_t data = VLD1Q(str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data = VANDQ(data, char_mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCS)
|
||||
vect_t eq = VCEQQ(data, vc1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
vect_t eq1 = VCEQQ(data, vc1);
|
||||
vect_t eq2 = VCEQQ(data, vc2);
|
||||
vect_t eq = VORRQ(eq1, eq2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
vect_t eq = VORRQ(data, vmask);
|
||||
eq = VCEQQ(eq, vc1);
|
||||
|
||||
#elif defined(FFCPS)
|
||||
# if defined(FFCPS_DIFF1)
|
||||
vect_t prev_data = data;
|
||||
# endif
|
||||
|
||||
vect_t data2;
|
||||
if (p1 < str_ptr)
|
||||
{
|
||||
data2 = VLD1Q(str_ptr - diff);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
data2 = shift_left_n_lanes(data, offs1 - offs2);
|
||||
|
||||
if (compare1_type == compare_match1)
|
||||
data = VCEQQ(data, cmp1a);
|
||||
else
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
|
||||
if (compare2_type == compare_match1)
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
else
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
|
||||
vect_t eq = VANDQ(data, data2);
|
||||
#endif
|
||||
|
||||
VST1Q(qw.mem, eq);
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
if (align_offset < 8)
|
||||
{
|
||||
qw.dw[0] >>= align_offset * 8;
|
||||
if (qw.dw[0])
|
||||
{
|
||||
str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
|
||||
goto match;
|
||||
}
|
||||
if (qw.dw[1])
|
||||
{
|
||||
str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
qw.dw[1] >>= (align_offset - 8) * 8;
|
||||
if (qw.dw[1])
|
||||
{
|
||||
str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
str_ptr += 16;
|
||||
|
||||
while (str_ptr < str_end)
|
||||
{
|
||||
vect_t orig_data = VLD1Q(str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
orig_data = VANDQ(orig_data, char_mask);
|
||||
#endif
|
||||
data = orig_data;
|
||||
|
||||
#if defined(FFCS)
|
||||
eq = VCEQQ(data, vc1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
eq1 = VCEQQ(data, vc1);
|
||||
eq2 = VCEQQ(data, vc2);
|
||||
eq = VORRQ(eq1, eq2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
eq = VORRQ(data, vmask);
|
||||
eq = VCEQQ(eq, vc1);
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
# if defined (FFCPS_DIFF1)
|
||||
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
|
||||
# else
|
||||
data2 = VLD1Q(str_ptr - diff);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
data = VCEQQ(data, cmp1a);
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
# else
|
||||
if (compare1_type == compare_match1)
|
||||
data = VCEQQ(data, cmp1a);
|
||||
else
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
if (compare2_type == compare_match1)
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
else
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
# endif
|
||||
|
||||
eq = VANDQ(data, data2);
|
||||
#endif
|
||||
|
||||
VST1Q(qw.mem, eq);
|
||||
if (qw.dw[0])
|
||||
str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
|
||||
else if (qw.dw[1])
|
||||
str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
else {
|
||||
str_ptr += 16;
|
||||
#if defined (FFCPS_DIFF1)
|
||||
prev_data = orig_data;
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
match:;
|
||||
if (str_ptr >= str_end)
|
||||
/* Failed match. */
|
||||
return NULL;
|
||||
|
||||
#if defined(FF_UTF)
|
||||
if (utf_continue(str_ptr + IN_UCHARS(-offs1)))
|
||||
{
|
||||
/* Not a match. */
|
||||
str_ptr += IN_UCHARS(1);
|
||||
goto restart;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Match. */
|
||||
#if defined (FFCPS)
|
||||
str_ptr -= IN_UCHARS(offs1);
|
||||
#endif
|
||||
return str_ptr;
|
||||
}
|
||||
|
||||
/* Failed match. */
|
||||
return NULL;
|
||||
}
|
1123
pcre2/src/pcre2_jit_simd_inc.h
Normal file
1123
pcre2/src/pcre2_jit_simd_inc.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -6115,8 +6115,8 @@ BOOL has_req_cu = FALSE;
|
||||
BOOL startline;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
BOOL memchr_not_found_first_cu = FALSE;
|
||||
BOOL memchr_not_found_first_cu2 = FALSE;
|
||||
BOOL memchr_not_found_first_cu;
|
||||
BOOL memchr_not_found_first_cu2;
|
||||
#endif
|
||||
|
||||
PCRE2_UCHAR first_cu = 0;
|
||||
@ -6709,6 +6709,11 @@ FRAGMENT_RESTART:
|
||||
start_partial = match_partial = NULL;
|
||||
mb->hitend = FALSE;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
memchr_not_found_first_cu = FALSE;
|
||||
memchr_not_found_first_cu2 = FALSE;
|
||||
#endif
|
||||
|
||||
for(;;)
|
||||
{
|
||||
PCRE2_SPTR new_start_match;
|
||||
@ -7187,6 +7192,7 @@ if (utf && end_subject != true_end_subject &&
|
||||
starting code units in 8-bit and 16-bit modes. */
|
||||
|
||||
start_match = end_subject + 1;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
|
||||
start_match++;
|
||||
|
836
pcre2/src/pcre2_printint.c
Normal file
836
pcre2/src/pcre2_printint.c
Normal file
@ -0,0 +1,836 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is #included in pcre2test.c at each supported
|
||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||
that comprise the library. It can also optionally be included in
|
||||
pcre2_compile.c for detailed debugging in error situations. */
|
||||
|
||||
|
||||
/* Tables of operator names. The same 8-bit table is used for all code unit
|
||||
widths, so it must be defined only once. The list itself is defined in
|
||||
pcre2_internal.h, which is #included by pcre2test before this file. */
|
||||
|
||||
#ifndef OP_LISTS_DEFINED
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
#define OP_LISTS_DEFINED
|
||||
#endif
|
||||
|
||||
/* The functions and tables herein must all have mode-dependent names. */
|
||||
|
||||
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
|
||||
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
|
||||
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
||||
#define print_char PCRE2_SUFFIX(print_char_)
|
||||
#define print_custring PCRE2_SUFFIX(print_custring_)
|
||||
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
|
||||
#define print_prop PCRE2_SUFFIX(print_prop_)
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
The contents of the table are, however, mode-dependent. */
|
||||
|
||||
static const uint8_t OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print one character from a string *
|
||||
*************************************************/
|
||||
|
||||
/* In UTF mode the character may occupy more than one code unit.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr pointer to first code unit of the character
|
||||
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
|
||||
|
||||
Returns: number of additional code units used
|
||||
*/
|
||||
|
||||
static unsigned int
|
||||
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
||||
{
|
||||
uint32_t c = *ptr;
|
||||
BOOL one_code_unit = !utf;
|
||||
|
||||
/* If UTF is supported and requested, check for a valid single code unit. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
one_code_unit = c < 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
one_code_unit = (c & 0xfc00) != 0xd800;
|
||||
#else
|
||||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle a valid one-code-unit character at any width. */
|
||||
|
||||
if (one_code_unit)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%02x}", c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
||||
for each width. If UTF is not supported, control should never get here, but we
|
||||
need a return statement to keep the compiler happy. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
return 0;
|
||||
#else
|
||||
|
||||
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
|
||||
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
|
||||
\X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if ((c & 0xc0) != 0xc0)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & PRIV(utf8_table3)[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
|
||||
return i - 1;
|
||||
}
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
||||
Print it with \X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((ptr[1] & 0xfc00) != 0xdc00)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
}
|
||||
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return 1;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
|
||||
|
||||
/* For UTF-32 we get here only for a malformed code unit, which should only
|
||||
occur if the sanity check has been turned off. Print it with \X instead of \x
|
||||
as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print string as a list of code units *
|
||||
*************************************************/
|
||||
|
||||
/* These take no account of UTF as they always print each individual code unit.
|
||||
The string is zero-terminated for print_custring(); the length is given for
|
||||
print_custring_bylen().
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr point to the string
|
||||
len length for print_custring_bylen()
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_custring(FILE *f, PCRE2_SPTR ptr)
|
||||
{
|
||||
while (*ptr != '\0')
|
||||
{
|
||||
uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
|
||||
{
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||
function should not be called in such configurations, because a pattern that
|
||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||
into the main code, however, we just put one into this function. */
|
||||
|
||||
static const char *
|
||||
get_ucpname(unsigned int ptype, unsigned int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int i;
|
||||
for (i = PRIV(utt_size) - 1; i >= 0; i--)
|
||||
{
|
||||
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
|
||||
}
|
||||
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
|
||||
#else /* No UTF support */
|
||||
(void)ptype;
|
||||
(void)pvalue;
|
||||
return "??";
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print Unicode property value *
|
||||
*************************************************/
|
||||
|
||||
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
|
||||
pseudo-property that contains a pointer to a list of case-equivalent
|
||||
characters.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
code pointer in the compiled code
|
||||
before text to print before
|
||||
after text to print after
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
|
||||
{
|
||||
if (code[1] != PT_CLIST)
|
||||
{
|
||||
fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
|
||||
code[2]), after);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *not = (*code == OP_PROP)? "" : "not ";
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
|
||||
fprintf (f, "%s%sclist", before, not);
|
||||
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
|
||||
fprintf(f, "%s", after);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* The print_lengths flag controls whether offsets and lengths of items are
|
||||
printed. Lenths can be turned off from pcre2test so that automatic tests on
|
||||
bytecode can be written that do not depend on the value of LINK_SIZE.
|
||||
|
||||
Arguments:
|
||||
re a compiled pattern
|
||||
f the file to write to
|
||||
print_lengths show various lengths
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
PCRE2_SPTR codestart, nametable, code;
|
||||
uint32_t nesize = re->name_entry_size;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
|
||||
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
|
||||
code = codestart = nametable + re->name_count * re->name_entry_size;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
PCRE2_SPTR ccode;
|
||||
uint32_t c;
|
||||
int i;
|
||||
const char *flag = " ";
|
||||
unsigned int extra = 0;
|
||||
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
/* ========================================================================== */
|
||||
/* These cases are never obeyed. This is a fudge that causes a compile-
|
||||
time error if the vectors OP_names or OP_lengths, which are indexed
|
||||
by opcode, are not the correct length. It seems to be the only way to do
|
||||
such a check at compile time, as the sizeof() operator does not work in
|
||||
the C preprocessor. */
|
||||
|
||||
case OP_TABLE_LENGTH:
|
||||
case OP_TABLE_LENGTH +
|
||||
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
|
||||
(sizeof(OP_lengths) == OP_TABLE_LENGTH)):
|
||||
return;
|
||||
/* ========================================================================== */
|
||||
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
case OP_CHAR:
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CHARI:
|
||||
fprintf(f, " /i ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf);
|
||||
}
|
||||
while (*code == OP_CHARI);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRA:
|
||||
case OP_SCBRAPOS:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_BRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRA:
|
||||
case OP_SBRAPOS:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRPOS:
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
case OP_REVERSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CLOSE:
|
||||
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_DNCREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Cond ref <", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_RREF:
|
||||
c = GET2(code, 1);
|
||||
if (c == RREF_ANY)
|
||||
fprintf(f, " Cond recurse any");
|
||||
else
|
||||
fprintf(f, " Cond recurse %d", c);
|
||||
break;
|
||||
|
||||
case OP_DNRREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Cond recurse <", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_FALSE:
|
||||
fprintf(f, " Cond false");
|
||||
break;
|
||||
|
||||
case OP_TRUE:
|
||||
fprintf(f, " Cond true");
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_POSSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
fprintf(f, " %s ", flag);
|
||||
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
{
|
||||
print_prop(f, code + 1, "", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, "%s", OP_names[code[1]]);
|
||||
}
|
||||
else extra = print_char(f, code+1, utf);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_EXACTI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
fprintf(f, " %s ", flag);
|
||||
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
|
||||
else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
{
|
||||
print_prop(f, code + IMM2_SIZE + 1, " ", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_NOTI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_NOT:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "]");
|
||||
break;
|
||||
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTPOSQUERY:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "]%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTPOSUPTO:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
|
||||
fprintf(f, "]{");
|
||||
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
|
||||
else
|
||||
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REFI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_REF:
|
||||
fprintf(f, " %s \\%d", flag, GET2(code,1));
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_DNREFI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_DNREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s \\k<", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
|
||||
GET(code, 1), GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
c = code[1 + 4*LINK_SIZE];
|
||||
fprintf(f, " %s %c", OP_names[*code], c);
|
||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
if (c == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
c = PRIV(callout_end_delims)[i];
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
|
||||
GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
print_prop(f, code, " ", "");
|
||||
break;
|
||||
|
||||
/* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
|
||||
in having this code always here, and it makes it less messy without all
|
||||
those #ifdefs. */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
case OP_XCLASS:
|
||||
{
|
||||
unsigned int min, max;
|
||||
BOOL printmap;
|
||||
BOOL invertmap = FALSE;
|
||||
uint8_t *map;
|
||||
uint8_t inverted_map[32];
|
||||
|
||||
fprintf(f, " [");
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
extra = GET(code, 1);
|
||||
ccode = code + LINK_SIZE + 1;
|
||||
printmap = (*ccode & XCL_MAP) != 0;
|
||||
if ((*ccode & XCL_NOT) != 0)
|
||||
{
|
||||
invertmap = (*ccode & XCL_HASPROP) == 0;
|
||||
fprintf(f, "^");
|
||||
}
|
||||
ccode++;
|
||||
}
|
||||
else
|
||||
{
|
||||
printmap = TRUE;
|
||||
ccode = code + 1;
|
||||
}
|
||||
|
||||
/* Print a bit map */
|
||||
|
||||
if (printmap)
|
||||
{
|
||||
map = (uint8_t *)ccode;
|
||||
if (invertmap)
|
||||
{
|
||||
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
||||
for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
|
||||
map = inverted_map;
|
||||
}
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((map[i/8] & (1u << (i&7))) != 0)
|
||||
{
|
||||
int j;
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((map[j/8] & (1u << (j&7))) == 0) break;
|
||||
if (i == '-' || i == ']') fprintf(f, "\\");
|
||||
if (PRINTABLE(i)) fprintf(f, "%c", i);
|
||||
else fprintf(f, "\\x%02x", i);
|
||||
if (--j > i)
|
||||
{
|
||||
if (j != i + 1) fprintf(f, "-");
|
||||
if (j == '-' || j == ']') fprintf(f, "\\");
|
||||
if (PRINTABLE(j)) fprintf(f, "%c", j);
|
||||
else fprintf(f, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
ccode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* For an XCLASS there is always some additional data */
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
PCRE2_UCHAR ch;
|
||||
while ((ch = *ccode++) != XCL_END)
|
||||
{
|
||||
BOOL not = FALSE;
|
||||
const char *notch = "";
|
||||
|
||||
switch(ch)
|
||||
{
|
||||
case XCL_NOTPROP:
|
||||
not = TRUE;
|
||||
notch = "^";
|
||||
/* Fall through */
|
||||
|
||||
case XCL_PROP:
|
||||
{
|
||||
unsigned int ptype = *ccode++;
|
||||
unsigned int pvalue = *ccode++;
|
||||
|
||||
switch(ptype)
|
||||
{
|
||||
case PT_PXGRAPH:
|
||||
fprintf(f, "[:%sgraph:]", notch);
|
||||
break;
|
||||
|
||||
case PT_PXPRINT:
|
||||
fprintf(f, "[:%sprint:]", notch);
|
||||
break;
|
||||
|
||||
case PT_PXPUNCT:
|
||||
fprintf(f, "[:%spunct:]", notch);
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(f, "\\%c{%s}", (not? 'P':'p'),
|
||||
get_ucpname(ptype, pvalue));
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ccode += 1 + print_char(f, ccode, utf);
|
||||
if (ch == XCL_RANGE)
|
||||
{
|
||||
fprintf(f, "-");
|
||||
ccode += 1 + print_char(f, ccode, utf);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Indicate a non-UTF class which was created by negation */
|
||||
|
||||
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
|
||||
|
||||
/* Handle repeats after a class or a back reference */
|
||||
|
||||
CLASS_REF_REPEAT:
|
||||
switch(*ccode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSPLUS:
|
||||
case OP_CRPOSQUERY:
|
||||
fprintf(f, "%s", OP_names[*ccode]);
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
min = GET2(ccode,1);
|
||||
max = GET2(ccode,1 + IMM2_SIZE);
|
||||
if (max == 0) fprintf(f, "{%u,}", min);
|
||||
else fprintf(f, "{%u,%u}", min, max);
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
/* Do nothing if it's not a repeat; this code stops picky compilers
|
||||
warning about the lack of a default code path. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
fprintf(f, " %s ", OP_names[*code]);
|
||||
print_custring_bylen(f, code + 2, code[1]);
|
||||
extra += code[1];
|
||||
break;
|
||||
|
||||
case OP_THEN:
|
||||
fprintf(f, " %s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CIRCM:
|
||||
case OP_DOLLM:
|
||||
flag = "/m";
|
||||
/* Fall through */
|
||||
|
||||
/* Anything else is just an item with no data, but possibly a flag. */
|
||||
|
||||
default:
|
||||
fprintf(f, " %s %s", flag, OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code += OP_lengths[*code] + extra;
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_printint.c */
|
494
pcre2/src/pcre2demo.c
Normal file
494
pcre2/src/pcre2demo.c
Normal file
@ -0,0 +1,494 @@
|
||||
/*************************************************
|
||||
* PCRE2 DEMONSTRATION PROGRAM *
|
||||
*************************************************/
|
||||
|
||||
/* This is a demonstration program to illustrate a straightforward way of
|
||||
using the PCRE2 regular expression library from a C program. See the
|
||||
pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
||||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||
incompatible with the original PCRE API.
|
||||
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
width. This demonstration program uses the 8-bit library. The default is to
|
||||
process each code unit as a separate character, but if the pattern begins with
|
||||
"(*UTF)", both it and the subject are treated as UTF-8 strings, where
|
||||
characters may occupy multiple code units.
|
||||
|
||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||
libraries, you should be able to compile this program using this command:
|
||||
|
||||
cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
|
||||
|
||||
If PCRE2 is not installed in a standard place, it is likely to be installed
|
||||
with support for the pkg-config mechanism. If you have pkg-config, you can
|
||||
compile this program using this command:
|
||||
|
||||
cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
|
||||
|
||||
If you do not have pkg-config, you may have to use something like this:
|
||||
|
||||
cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
|
||||
-R/usr/local/lib -lpcre2-8 -o pcre2demo
|
||||
|
||||
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
|
||||
library files for PCRE2 are installed on your system. Only some operating
|
||||
systems (Solaris is one) use the -R option.
|
||||
|
||||
Building under Windows:
|
||||
|
||||
If you want to statically link this program against a non-dll .a file, you must
|
||||
define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
|
||||
the following line. */
|
||||
|
||||
/* #define PCRE2_STATIC */
|
||||
|
||||
/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
|
||||
For a program that uses only one code unit width, setting it to 8, 16, or 32
|
||||
makes it possible to use generic function names such as pcre2_compile(). Note
|
||||
that just changing 8 to 16 (for example) is not sufficient to convert this
|
||||
program to process 16-bit characters. Even in a fully 16-bit environment, where
|
||||
string-handling functions such as strcmp() and printf() work with 16-bit
|
||||
characters, the code for handling the table of named substrings will still need
|
||||
to be modified. */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre2.h>
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* Here is the program. The API includes the concept of "contexts" for *
|
||||
* setting up unusual interface requirements for compiling and matching, *
|
||||
* such as custom memory managers and non-standard newline definitions. *
|
||||
* This program does not do any of this, so it makes no use of contexts, *
|
||||
* always passing NULL where a context could be given. *
|
||||
**************************************************************************/
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
pcre2_code *re;
|
||||
PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */
|
||||
PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */
|
||||
PCRE2_SPTR name_table;
|
||||
|
||||
int crlf_is_newline;
|
||||
int errornumber;
|
||||
int find_all;
|
||||
int i;
|
||||
int rc;
|
||||
int utf8;
|
||||
|
||||
uint32_t option_bits;
|
||||
uint32_t namecount;
|
||||
uint32_t name_entry_size;
|
||||
uint32_t newline;
|
||||
|
||||
PCRE2_SIZE erroroffset;
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE subject_length;
|
||||
|
||||
pcre2_match_data *match_data;
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* First, sort out the command line. There is only one possible option at *
|
||||
* the moment, "-g" to request repeated matching to find all occurrences, *
|
||||
* like Perl's /g option. We set the variable find_all to a non-zero value *
|
||||
* if the -g option is present. *
|
||||
**************************************************************************/
|
||||
|
||||
find_all = 0;
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
if (strcmp(argv[i], "-g") == 0) find_all = 1;
|
||||
else if (argv[i][0] == '-')
|
||||
{
|
||||
printf("Unrecognised option %s\n", argv[i]);
|
||||
return 1;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
/* After the options, we require exactly two arguments, which are the pattern,
|
||||
and the subject string. */
|
||||
|
||||
if (argc - i != 2)
|
||||
{
|
||||
printf("Exactly two arguments required: a regex and a subject string\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Pattern and subject are char arguments, so they can be straightforwardly
|
||||
cast to PCRE2_SPTR because we are working in 8-bit code units. The subject
|
||||
length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact
|
||||
defined to be size_t. */
|
||||
|
||||
pattern = (PCRE2_SPTR)argv[i];
|
||||
subject = (PCRE2_SPTR)argv[i+1];
|
||||
subject_length = (PCRE2_SIZE)strlen((char *)subject);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* Now we are going to compile the regular expression pattern, and handle *
|
||||
* any errors that are detected. *
|
||||
*************************************************************************/
|
||||
|
||||
re = pcre2_compile(
|
||||
pattern, /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errornumber, /* for error number */
|
||||
&erroroffset, /* for error offset */
|
||||
NULL); /* use default compile context */
|
||||
|
||||
/* Compilation failed: print the error message and exit. */
|
||||
|
||||
if (re == NULL)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||
buffer);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the compilation succeeded, we call PCRE2 again, in order to do a *
|
||||
* pattern match against the subject string. This does just ONE match. If *
|
||||
* further matching is needed, it will be done below. Before running the *
|
||||
* match we must set up a match_data block for holding the result. Using *
|
||||
* pcre2_match_data_create_from_pattern() ensures that the block is *
|
||||
* exactly the right size for the number of capturing parentheses in the *
|
||||
* pattern. If you need to know the actual size of a match_data block as *
|
||||
* a number of bytes, you can find it like this: *
|
||||
* *
|
||||
* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data); *
|
||||
*************************************************************************/
|
||||
|
||||
match_data = pcre2_match_data_create_from_pattern(re, NULL);
|
||||
|
||||
/* Now run the match. */
|
||||
|
||||
rc = pcre2_match(
|
||||
re, /* the compiled pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
0, /* start at offset 0 in the subject */
|
||||
0, /* default options */
|
||||
match_data, /* block for storing the result */
|
||||
NULL); /* use default match context */
|
||||
|
||||
/* Matching failed: handle error cases */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
|
||||
/*
|
||||
Handle other special cases if you like
|
||||
*/
|
||||
default: printf("Matching error %d\n", rc); break;
|
||||
}
|
||||
pcre2_match_data_free(match_data); /* Release memory used for the match */
|
||||
pcre2_code_free(re); /* data and the compiled pattern. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
stored. */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
printf("Match succeeded at offset %d\n", (int)ovector[0]);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* We have found the first match within the subject string. If the output *
|
||||
* vector wasn't big enough, say so. Then output any substrings that were *
|
||||
* captured. *
|
||||
*************************************************************************/
|
||||
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
pcre2_match_data_create_from_pattern() above. */
|
||||
|
||||
if (rc == 0)
|
||||
printf("ovector was not big enough for all the captured substrings\n");
|
||||
|
||||
/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion
|
||||
to set the start of a match later than its end. In this demonstration program,
|
||||
we just detect this case and give up. */
|
||||
|
||||
if (ovector[0] > ovector[1])
|
||||
{
|
||||
printf("\\K was used in an assertion to set the match start after its end.\n"
|
||||
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
|
||||
(char *)(subject + ovector[1]));
|
||||
printf("Run abandoned\n");
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Show substrings stored in the output vector by number. Obviously, in a real
|
||||
application you might want to do things other than print them. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
PCRE2_SPTR substring_start = subject + ovector[2*i];
|
||||
PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* That concludes the basic part of this demonstration program. We have *
|
||||
* compiled a pattern, and performed a single match. The code that follows *
|
||||
* shows first how to access named substrings, and then how to code for *
|
||||
* repeated matches on the same subject. *
|
||||
**************************************************************************/
|
||||
|
||||
/* See if there are any named substrings, and if so, show them by name. First
|
||||
we have to extract the count of named parentheses from the pattern. */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
|
||||
&namecount); /* where to put the answer */
|
||||
|
||||
if (namecount == 0) printf("No named substrings\n"); else
|
||||
{
|
||||
PCRE2_SPTR tabptr;
|
||||
printf("Named substrings\n");
|
||||
|
||||
/* Before we can access the substrings, we must extract the table for
|
||||
translating names to numbers, and the size of each entry in the table. */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMETABLE, /* address of the table */
|
||||
&name_table); /* where to put the answer */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
|
||||
&name_entry_size); /* where to put the answer */
|
||||
|
||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
bytes, most significant first. */
|
||||
|
||||
tabptr = name_table;
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the "-g" option was given on the command line, we want to continue *
|
||||
* to search for additional matches in the subject string, in a similar *
|
||||
* way to the /g option in Perl. This turns out to be trickier than you *
|
||||
* might think because of the possibility of matching an empty string. *
|
||||
* What happens is as follows: *
|
||||
* *
|
||||
* If the previous match was NOT for an empty string, we can just start *
|
||||
* the next match at the end of the previous one. *
|
||||
* *
|
||||
* If the previous match WAS for an empty string, we can't do that, as it *
|
||||
* would lead to an infinite loop. Instead, a call of pcre2_match() is *
|
||||
* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
|
||||
* first of these tells PCRE2 that an empty string at the start of the *
|
||||
* subject is not a valid match; other possibilities must be tried. The *
|
||||
* second flag restricts PCRE2 to one match attempt at the initial string *
|
||||
* position. If this match succeeds, an alternative to the empty string *
|
||||
* match has been found, and we can print it and proceed round the loop, *
|
||||
* advancing by the length of whatever was found. If this match does not *
|
||||
* succeed, we still stay in the loop, advancing by just one character. *
|
||||
* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
|
||||
* more than one byte. *
|
||||
* *
|
||||
* However, there is a complication concerned with newlines. When the *
|
||||
* newline convention is such that CRLF is a valid newline, we must *
|
||||
* advance by two characters rather than one. The newline convention can *
|
||||
* be set in the regex by (*CR), etc.; if not, we must find the default. *
|
||||
*************************************************************************/
|
||||
|
||||
if (!find_all) /* Check for -g */
|
||||
{
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||
return 0; /* Exit the program. */
|
||||
}
|
||||
|
||||
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
|
||||
sequence. First, find the options with which the regex was compiled and extract
|
||||
the UTF state. */
|
||||
|
||||
(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
|
||||
utf8 = (option_bits & PCRE2_UTF) != 0;
|
||||
|
||||
/* Now find the newline convention and see whether CRLF is a valid newline
|
||||
sequence. */
|
||||
|
||||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||
newline == PCRE2_NEWLINE_CRLF ||
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
uint32_t options = 0; /* Normally no options */
|
||||
PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
|
||||
|
||||
/* If the previous match was for an empty string, we are finished if we are
|
||||
at the end of the subject. Otherwise, arrange to run another match at the
|
||||
same point to see if a non-empty match can be found. */
|
||||
|
||||
if (ovector[0] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == subject_length) break;
|
||||
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
}
|
||||
|
||||
/* If the previous match was not an empty string, there is one tricky case to
|
||||
consider. If a pattern contains \K within a lookbehind assertion at the
|
||||
start, the end of the matched string can be at the offset where the match
|
||||
started. Without special action, this leads to a loop that keeps on matching
|
||||
the same substring. We must detect this case and arrange to move the start on
|
||||
by one character. The pcre2_get_startchar() function returns the starting
|
||||
offset that was passed to pcre2_match(). */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
|
||||
if (start_offset <= startchar)
|
||||
{
|
||||
if (startchar >= subject_length) break; /* Reached end of subject. */
|
||||
start_offset = startchar + 1; /* Advance by one character. */
|
||||
if (utf8) /* If UTF-8, it may be more */
|
||||
{ /* than one code unit. */
|
||||
for (; start_offset < subject_length; start_offset++)
|
||||
if ((subject[start_offset] & 0xc0) != 0x80) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Run the next matching operation */
|
||||
|
||||
rc = pcre2_match(
|
||||
re, /* the compiled pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
start_offset, /* starting offset in the subject */
|
||||
options, /* options */
|
||||
match_data, /* block for storing the result */
|
||||
NULL); /* use default match context */
|
||||
|
||||
/* This time, a result of NOMATCH isn't an error. If the value in "options"
|
||||
is zero, it just means we have found all possible matches, so the loop ends.
|
||||
Otherwise, it means we have failed to find a non-empty-string match at a
|
||||
point where there was a previous empty-string match. In this case, we do what
|
||||
Perl does: advance the matching position by one character, and continue. We
|
||||
do this by setting the "end of previous match" offset, because that is picked
|
||||
up at the top of the loop as the point at which to start again.
|
||||
|
||||
There are two complications: (a) When CRLF is a valid newline sequence, and
|
||||
the current position is just before it, advance by an extra byte. (b)
|
||||
Otherwise we must ensure that we skip an entire UTF character if we are in
|
||||
UTF mode. */
|
||||
|
||||
if (rc == PCRE2_ERROR_NOMATCH)
|
||||
{
|
||||
if (options == 0) break; /* All matches found */
|
||||
ovector[1] = start_offset + 1; /* Advance one code unit */
|
||||
if (crlf_is_newline && /* If CRLF is a newline & */
|
||||
start_offset < subject_length - 1 && /* we are at CRLF, */
|
||||
subject[start_offset] == '\r' &&
|
||||
subject[start_offset + 1] == '\n')
|
||||
ovector[1] += 1; /* Advance by one more. */
|
||||
else if (utf8) /* Otherwise, ensure we */
|
||||
{ /* advance a whole UTF-8 */
|
||||
while (ovector[1] < subject_length) /* character. */
|
||||
{
|
||||
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
|
||||
ovector[1] += 1;
|
||||
}
|
||||
}
|
||||
continue; /* Go round the loop again */
|
||||
}
|
||||
|
||||
/* Other matching errors are not recoverable. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
printf("Matching error %d\n", rc);
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
|
||||
printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
|
||||
|
||||
/* The match succeeded, but the output vector wasn't big enough. This
|
||||
should not happen. */
|
||||
|
||||
if (rc == 0)
|
||||
printf("ovector was not big enough for all the captured substrings\n");
|
||||
|
||||
/* We must guard against patterns such as /(?=.\K)/ that use \K in an
|
||||
assertion to set the start of a match later than its end. In this
|
||||
demonstration program, we just detect this case and give up. */
|
||||
|
||||
if (ovector[0] > ovector[1])
|
||||
{
|
||||
printf("\\K was used in an assertion to set the match start after its end.\n"
|
||||
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
|
||||
(char *)(subject + ovector[1]));
|
||||
printf("Run abandoned\n");
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* As before, show substrings stored in the output vector by number, and then
|
||||
also any named substrings. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
PCRE2_SPTR substring_start = subject + ovector[2*i];
|
||||
size_t substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
|
||||
}
|
||||
|
||||
if (namecount == 0) printf("No named substrings\n"); else
|
||||
{
|
||||
PCRE2_SPTR tabptr = name_table;
|
||||
printf("Named substrings\n");
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
} /* End of loop to find second and subsequent matches */
|
||||
|
||||
printf("\n");
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2demo.c */
|
423
pcre2/src/pcre2posix.c
Normal file
423
pcre2/src/pcre2posix.c
Normal file
@ -0,0 +1,423 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module is a wrapper that provides a POSIX API to the underlying PCRE2
|
||||
functions. The operative functions are called pcre2_regcomp(), etc., with
|
||||
wrappers that use the plain POSIX names. In addition, pcre2posix.h defines the
|
||||
POSIX names as macros for the pcre2_xxx functions, so any program that includes
|
||||
it and uses the POSIX names will call the base functions directly. This makes
|
||||
it easier for an application to be sure it gets the PCRE2 versions in the
|
||||
presence of other POSIX regex libraries. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
/* Ensure that the PCRE2POSIX_EXP_xxx macros are set appropriately for
|
||||
compiling these functions. This must come before including pcre2posix.h, where
|
||||
they are set for an application (using these functions) if they have not
|
||||
previously been set. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# define PCRE2POSIX_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE2POSIX_EXP_DEFN __declspec(dllexport)
|
||||
#endif
|
||||
|
||||
/* Older versions of MSVC lack snprintf(). This define allows for
|
||||
warning/error-free compilation and testing with MSVC compilers back to at least
|
||||
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
|
||||
/* Compile-time error numbers start at this value. It should probably never be
|
||||
changed. This #define is a copy of the one in pcre2_internal.h. */
|
||||
|
||||
#define COMPILE_ERROR_BASE 100
|
||||
|
||||
|
||||
/* Standard C headers */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* PCRE2 headers */
|
||||
|
||||
#include "pcre2.h"
|
||||
#include "pcre2posix.h"
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order to make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
|
||||
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
|
||||
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
|
||||
don't. */
|
||||
|
||||
static const int eint1[] = {
|
||||
0, /* No error */
|
||||
REG_EESCAPE, /* \ at end of pattern */
|
||||
REG_EESCAPE, /* \c at end of pattern */
|
||||
REG_EESCAPE, /* unrecognized character follows \ */
|
||||
REG_BADBR, /* numbers out of order in {} quantifier */
|
||||
/* 5 */
|
||||
REG_BADBR, /* number too big in {} quantifier */
|
||||
REG_EBRACK, /* missing terminating ] for character class */
|
||||
REG_ECTYPE, /* invalid escape sequence in character class */
|
||||
REG_ERANGE, /* range out of order in character class */
|
||||
REG_BADRPT, /* nothing to repeat */
|
||||
/* 10 */
|
||||
REG_ASSERT, /* internal error: unexpected repeat */
|
||||
REG_BADPAT, /* unrecognized character after (? or (?- */
|
||||
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||
REG_EPAREN, /* missing ) */
|
||||
/* 15 */
|
||||
REG_ESUBREG, /* reference to non-existent subpattern */
|
||||
REG_INVARG, /* pattern passed as NULL */
|
||||
REG_INVARG, /* unknown compile-time option bit(s) */
|
||||
REG_EPAREN, /* missing ) after (?# comment */
|
||||
REG_ESIZE, /* parentheses nested too deeply */
|
||||
/* 20 */
|
||||
REG_ESIZE, /* regular expression too large */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_EPAREN, /* unmatched closing parenthesis */
|
||||
REG_ASSERT /* internal error: code overflow */
|
||||
};
|
||||
|
||||
static const int eint2[] = {
|
||||
30, REG_ECTYPE, /* unknown POSIX class name */
|
||||
32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */
|
||||
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
|
||||
56, REG_INVARG, /* internal error: unknown newline setting */
|
||||
92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
|
||||
static const char *const pstring[] = {
|
||||
"", /* Dummy for value 0 */
|
||||
"internal error", /* REG_ASSERT */
|
||||
"invalid repeat counts in {}", /* BADBR */
|
||||
"pattern error", /* BADPAT */
|
||||
"? * + invalid", /* BADRPT */
|
||||
"unbalanced {}", /* EBRACE */
|
||||
"unbalanced []", /* EBRACK */
|
||||
"collation error - not relevant", /* ECOLLATE */
|
||||
"bad class", /* ECTYPE */
|
||||
"bad escape sequence", /* EESCAPE */
|
||||
"empty expression", /* EMPTY */
|
||||
"unbalanced ()", /* EPAREN */
|
||||
"bad range inside []", /* ERANGE */
|
||||
"expression too big", /* ESIZE */
|
||||
"failed to get memory", /* ESPACE */
|
||||
"bad back reference", /* ESUBREG */
|
||||
"bad argument", /* INVARG */
|
||||
"match failed" /* NOMATCH */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Wrappers with traditional POSIX names *
|
||||
*************************************************/
|
||||
|
||||
/* Keep defining them to preseve the ABI for applications linked to the pcre2
|
||||
POSIX library before these names were changed into macros in pcre2posix.h.
|
||||
This also ensures that the POSIX names are callable from languages that do not
|
||||
include pcre2posix.h. It is vital to #undef the macro definitions from
|
||||
pcre2posix.h! */
|
||||
|
||||
#undef regerror
|
||||
PCRE2POSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
|
||||
PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
|
||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
{
|
||||
return pcre2_regerror(errcode, preg, errbuf, errbuf_size);
|
||||
}
|
||||
|
||||
#undef regfree
|
||||
PCRE2POSIX_EXP_DECL void regfree(regex_t *);
|
||||
PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
regfree(regex_t *preg)
|
||||
{
|
||||
pcre2_regfree(preg);
|
||||
}
|
||||
|
||||
#undef regcomp
|
||||
PCRE2POSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
|
||||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
return pcre2_regcomp(preg, pattern, cflags);
|
||||
}
|
||||
|
||||
#undef regexec
|
||||
PCRE2POSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
return pcre2_regexec(preg, string, nmatch, pmatch, eflags);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Translate error code to string *
|
||||
*************************************************/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
|
||||
pcre2_regerror(int errcode, const regex_t *preg, char *errbuf,
|
||||
size_t errbuf_size)
|
||||
{
|
||||
int used;
|
||||
const char *message;
|
||||
|
||||
message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
"unknown error code" : pstring[errcode];
|
||||
|
||||
if (preg != NULL && (int)preg->re_erroffset != -1)
|
||||
{
|
||||
used = snprintf(errbuf, errbuf_size, "%s at offset %-6d", message,
|
||||
(int)preg->re_erroffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
used = snprintf(errbuf, errbuf_size, "%s", message);
|
||||
}
|
||||
|
||||
return used + 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store held by a regex *
|
||||
*************************************************/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_regfree(regex_t *preg)
|
||||
{
|
||||
pcre2_match_data_free(preg->re_match_data);
|
||||
pcre2_code_free(preg->re_pcre2_code);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compile a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
preg points to a structure for recording the compiled expression
|
||||
pattern the pattern to compile
|
||||
cflags compilation flags
|
||||
|
||||
Returns: 0 on success
|
||||
various non-zero codes on failure
|
||||
*/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
PCRE2_SIZE erroffset;
|
||||
PCRE2_SIZE patlen;
|
||||
int errorcode;
|
||||
int options = 0;
|
||||
int re_nsub = 0;
|
||||
|
||||
patlen = ((cflags & REG_PEND) != 0)? (PCRE2_SIZE)(preg->re_endp - pattern) :
|
||||
PCRE2_ZERO_TERMINATED;
|
||||
|
||||
if ((cflags & REG_ICASE) != 0) options |= PCRE2_CASELESS;
|
||||
if ((cflags & REG_NEWLINE) != 0) options |= PCRE2_MULTILINE;
|
||||
if ((cflags & REG_DOTALL) != 0) options |= PCRE2_DOTALL;
|
||||
if ((cflags & REG_NOSPEC) != 0) options |= PCRE2_LITERAL;
|
||||
if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
|
||||
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
|
||||
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
|
||||
|
||||
preg->re_cflags = cflags;
|
||||
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, patlen, options,
|
||||
&errorcode, &erroffset, NULL);
|
||||
preg->re_erroffset = erroffset;
|
||||
|
||||
if (preg->re_pcre2_code == NULL)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* A negative value is a UTF error; otherwise all error codes are greater
|
||||
than COMPILE_ERROR_BASE, but check, just in case. */
|
||||
|
||||
if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT;
|
||||
errorcode -= COMPILE_ERROR_BASE;
|
||||
|
||||
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
|
||||
return eint1[errorcode];
|
||||
for (i = 0; i < sizeof(eint2)/sizeof(const int); i += 2)
|
||||
if (errorcode == eint2[i]) return eint2[i+1];
|
||||
return REG_BADPAT;
|
||||
}
|
||||
|
||||
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
||||
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
||||
preg->re_nsub = (size_t)re_nsub;
|
||||
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
|
||||
preg->re_erroffset = (size_t)(-1); /* No meaning after successful compile */
|
||||
|
||||
if (preg->re_match_data == NULL)
|
||||
{
|
||||
pcre2_code_free(preg->re_pcre2_code);
|
||||
return REG_ESPACE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/* A suitable match_data block, large enough to hold all possible captures, was
|
||||
obtained when the pattern was compiled, to save having to allocate and free it
|
||||
for each match. If REG_NOSUB was specified at compile time, the nmatch and
|
||||
pmatch arguments are ignored, and the only result is yes/no/error. */
|
||||
|
||||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
int rc, so, eo;
|
||||
int options = 0;
|
||||
pcre2_match_data *md = (pcre2_match_data *)preg->re_match_data;
|
||||
|
||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
|
||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
|
||||
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY;
|
||||
|
||||
/* When REG_NOSUB was specified, or if no vector has been passed in which to
|
||||
put captured strings, ensure that nmatch is zero. This will stop any attempt to
|
||||
write to pmatch. */
|
||||
|
||||
if ((preg->re_cflags & REG_NOSUB) != 0 || pmatch == NULL) nmatch = 0;
|
||||
|
||||
/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
|
||||
The man page from OS X says "REG_STARTEND affects only the location of the
|
||||
string, not how it is matched". That is why the "so" value is used to bump the
|
||||
start location rather than being passed as a PCRE2 "starting offset". */
|
||||
|
||||
if ((eflags & REG_STARTEND) != 0)
|
||||
{
|
||||
if (pmatch == NULL) return REG_INVARG;
|
||||
so = pmatch[0].rm_so;
|
||||
eo = pmatch[0].rm_eo;
|
||||
}
|
||||
else
|
||||
{
|
||||
so = 0;
|
||||
eo = (int)strlen(string);
|
||||
}
|
||||
|
||||
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
|
||||
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
|
||||
|
||||
/* Successful match */
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
size_t i;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
|
||||
if ((size_t)rc > nmatch) rc = (int)nmatch;
|
||||
for (i = 0; i < (size_t)rc; i++)
|
||||
{
|
||||
pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 :
|
||||
(int)(ovector[i*2] + so);
|
||||
pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 :
|
||||
(int)(ovector[i*2+1] + so);
|
||||
}
|
||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unsuccessful match */
|
||||
|
||||
if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
|
||||
return REG_INVARG;
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
default: return REG_ASSERT;
|
||||
case PCRE2_ERROR_BADMODE: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADMAGIC: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADOPTION: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADUTFOFFSET: return REG_INVARG;
|
||||
case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE;
|
||||
case PCRE2_ERROR_NOMATCH: return REG_NOMATCH;
|
||||
case PCRE2_ERROR_NOMEMORY: return REG_ESPACE;
|
||||
case PCRE2_ERROR_NULL: return REG_INVARG;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2posix.c */
|
170
pcre2/src/pcre2posix.h
Normal file
170
pcre2/src/pcre2posix.h
Normal file
@ -0,0 +1,170 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language. This is
|
||||
the public header file to be #included by applications that call PCRE2 via the
|
||||
POSIX wrapper interface.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options, mostly defined by POSIX, but with some extras. */
|
||||
|
||||
#define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */
|
||||
#define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */
|
||||
#define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */
|
||||
#define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */
|
||||
#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */
|
||||
#define REG_NOSUB 0x0020 /* Do not report what was matched */
|
||||
#define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */
|
||||
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
|
||||
#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */
|
||||
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */
|
||||
#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */
|
||||
#define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */
|
||||
#define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */
|
||||
|
||||
/* This is not used by PCRE2, but by defining it we make it easier
|
||||
to slot PCRE2 into existing programs that make POSIX calls. */
|
||||
|
||||
#define REG_EXTENDED 0
|
||||
|
||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
||||
|
||||
enum {
|
||||
REG_ASSERT = 1, /* internal error ? */
|
||||
REG_BADBR, /* invalid repeat counts in {} */
|
||||
REG_BADPAT, /* pattern error */
|
||||
REG_BADRPT, /* ? * + invalid */
|
||||
REG_EBRACE, /* unbalanced {} */
|
||||
REG_EBRACK, /* unbalanced [] */
|
||||
REG_ECOLLATE, /* collation error - not relevant */
|
||||
REG_ECTYPE, /* bad class */
|
||||
REG_EESCAPE, /* bad escape sequence */
|
||||
REG_EMPTY, /* empty expression */
|
||||
REG_EPAREN, /* unbalanced () */
|
||||
REG_ERANGE, /* bad range inside [] */
|
||||
REG_ESIZE, /* expression too big */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_ESUBREG, /* bad back reference */
|
||||
REG_INVARG, /* bad argument */
|
||||
REG_NOMATCH /* match failed */
|
||||
};
|
||||
|
||||
|
||||
/* The structure representing a compiled regular expression. It is also used
|
||||
for passing the pattern end pointer when REG_PEND is set. */
|
||||
|
||||
typedef struct {
|
||||
void *re_pcre2_code;
|
||||
void *re_match_data;
|
||||
const char *re_endp;
|
||||
size_t re_nsub;
|
||||
size_t re_erroffset;
|
||||
int re_cflags;
|
||||
} regex_t;
|
||||
|
||||
/* The structure in which a captured offset is returned. */
|
||||
|
||||
typedef int regoff_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* When an application links to a PCRE2 DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export settings are needed, and are set in pcre2posix.c before including this
|
||||
file. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC) && !defined(PCRE2POSIX_EXP_DECL)
|
||||
# define PCRE2POSIX_EXP_DECL extern __declspec(dllimport)
|
||||
# define PCRE2POSIX_EXP_DEFN __declspec(dllimport)
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2POSIX_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2POSIX_EXP_DECL extern "C"
|
||||
# define PCRE2POSIX_EXP_DEFN extern "C"
|
||||
# else
|
||||
# define PCRE2POSIX_EXP_DECL extern
|
||||
# define PCRE2POSIX_EXP_DEFN extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The functions. The actual code is in functions with pcre2_xxx names for
|
||||
uniqueness. POSIX names are provided as macros for API compatibility with POSIX
|
||||
regex functions. It's done this way to ensure to they are always linked from
|
||||
the PCRE2 library and not by accident from elsewhere (regex_t differs in size
|
||||
elsewhere). */
|
||||
|
||||
PCRE2POSIX_EXP_DECL int pcre2_regcomp(regex_t *, const char *, int);
|
||||
PCRE2POSIX_EXP_DECL int pcre2_regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCRE2POSIX_EXP_DECL size_t pcre2_regerror(int, const regex_t *, char *, size_t);
|
||||
PCRE2POSIX_EXP_DECL void pcre2_regfree(regex_t *);
|
||||
|
||||
#define regcomp pcre2_regcomp
|
||||
#define regexec pcre2_regexec
|
||||
#define regerror pcre2_regerror
|
||||
#define regfree pcre2_regfree
|
||||
|
||||
/* Debian had a patch that used different names. These are now here to save
|
||||
them having to maintain their own patch, but are not documented by PCRE2. */
|
||||
|
||||
#define PCRE2regcomp pcre2_regcomp
|
||||
#define PCRE2regexec pcre2_regexec
|
||||
#define PCRE2regerror pcre2_regerror
|
||||
#define PCRE2regfree pcre2_regfree
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
/* End of pcre2posix.h */
|
148
pcre2/test-driver
vendored
Executable file
148
pcre2/test-driver
vendored
Executable file
@ -0,0 +1,148 @@
|
||||
#! /bin/sh
|
||||
# test-driver - basic testsuite driver script.
|
||||
|
||||
scriptversion=2018-03-07.03; # UTC
|
||||
|
||||
# Copyright (C) 2011-2020 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
# Make unconditional expansion of undefined variables an error. This
|
||||
# helps a lot in preventing typo-related bugs.
|
||||
set -u
|
||||
|
||||
usage_error ()
|
||||
{
|
||||
echo "$0: $*" >&2
|
||||
print_usage >&2
|
||||
exit 2
|
||||
}
|
||||
|
||||
print_usage ()
|
||||
{
|
||||
cat <<END
|
||||
Usage:
|
||||
test-driver --test-name=NAME --log-file=PATH --trs-file=PATH
|
||||
[--expect-failure={yes|no}] [--color-tests={yes|no}]
|
||||
[--enable-hard-errors={yes|no}] [--]
|
||||
TEST-SCRIPT [TEST-SCRIPT-ARGUMENTS]
|
||||
The '--test-name', '--log-file' and '--trs-file' options are mandatory.
|
||||
END
|
||||
}
|
||||
|
||||
test_name= # Used for reporting.
|
||||
log_file= # Where to save the output of the test script.
|
||||
trs_file= # Where to save the metadata of the test run.
|
||||
expect_failure=no
|
||||
color_tests=no
|
||||
enable_hard_errors=yes
|
||||
while test $# -gt 0; do
|
||||
case $1 in
|
||||
--help) print_usage; exit $?;;
|
||||
--version) echo "test-driver $scriptversion"; exit $?;;
|
||||
--test-name) test_name=$2; shift;;
|
||||
--log-file) log_file=$2; shift;;
|
||||
--trs-file) trs_file=$2; shift;;
|
||||
--color-tests) color_tests=$2; shift;;
|
||||
--expect-failure) expect_failure=$2; shift;;
|
||||
--enable-hard-errors) enable_hard_errors=$2; shift;;
|
||||
--) shift; break;;
|
||||
-*) usage_error "invalid option: '$1'";;
|
||||
*) break;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
missing_opts=
|
||||
test x"$test_name" = x && missing_opts="$missing_opts --test-name"
|
||||
test x"$log_file" = x && missing_opts="$missing_opts --log-file"
|
||||
test x"$trs_file" = x && missing_opts="$missing_opts --trs-file"
|
||||
if test x"$missing_opts" != x; then
|
||||
usage_error "the following mandatory options are missing:$missing_opts"
|
||||
fi
|
||||
|
||||
if test $# -eq 0; then
|
||||
usage_error "missing argument"
|
||||
fi
|
||||
|
||||
if test $color_tests = yes; then
|
||||
# Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'.
|
||||
red='[0;31m' # Red.
|
||||
grn='[0;32m' # Green.
|
||||
lgn='[1;32m' # Light green.
|
||||
blu='[1;34m' # Blue.
|
||||
mgn='[0;35m' # Magenta.
|
||||
std='[m' # No color.
|
||||
else
|
||||
red= grn= lgn= blu= mgn= std=
|
||||
fi
|
||||
|
||||
do_exit='rm -f $log_file $trs_file; (exit $st); exit $st'
|
||||
trap "st=129; $do_exit" 1
|
||||
trap "st=130; $do_exit" 2
|
||||
trap "st=141; $do_exit" 13
|
||||
trap "st=143; $do_exit" 15
|
||||
|
||||
# Test script is run here.
|
||||
"$@" >$log_file 2>&1
|
||||
estatus=$?
|
||||
|
||||
if test $enable_hard_errors = no && test $estatus -eq 99; then
|
||||
tweaked_estatus=1
|
||||
else
|
||||
tweaked_estatus=$estatus
|
||||
fi
|
||||
|
||||
case $tweaked_estatus:$expect_failure in
|
||||
0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
|
||||
0:*) col=$grn res=PASS recheck=no gcopy=no;;
|
||||
77:*) col=$blu res=SKIP recheck=no gcopy=yes;;
|
||||
99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;;
|
||||
*:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;;
|
||||
*:*) col=$red res=FAIL recheck=yes gcopy=yes;;
|
||||
esac
|
||||
|
||||
# Report the test outcome and exit status in the logs, so that one can
|
||||
# know whether the test passed or failed simply by looking at the '.log'
|
||||
# file, without the need of also peaking into the corresponding '.trs'
|
||||
# file (automake bug#11814).
|
||||
echo "$res $test_name (exit status: $estatus)" >>$log_file
|
||||
|
||||
# Report outcome to console.
|
||||
echo "${col}${res}${std}: $test_name"
|
||||
|
||||
# Register the test result, and other relevant metadata.
|
||||
echo ":test-result: $res" > $trs_file
|
||||
echo ":global-test-result: $res" >> $trs_file
|
||||
echo ":recheck: $recheck" >> $trs_file
|
||||
echo ":copy-in-global-log: $gcopy" >> $trs_file
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'before-save-hook 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC0"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
Loading…
x
Reference in New Issue
Block a user