Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@
U UNICODE For compatibility only. Ignored for string patterns (it
is the default), and forbidden for bytes patterns.

This module also defines an exception 'error'.
This module also defines exception 'PatternError', aliased to 'error' for
backward compatibility.

"""

Expand All @@ -133,7 +134,7 @@
"findall", "finditer", "compile", "purge", "escape",
"error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
"ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE", "NOFLAG", "RegexFlag",
"UNICODE", "NOFLAG", "RegexFlag", "PatternError"
]

__version__ = "2.2.1"
Expand All @@ -155,7 +156,7 @@ class RegexFlag:
_numeric_repr_ = hex

# sre exception
error = _compiler.error
PatternError = error = _compiler.PatternError

# --------------------------------------------------------------------
# public interface
Expand Down
2 changes: 1 addition & 1 deletion Lib/re/_casefix.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Auto-generated by Tools/scripts/generate_re_casefix.py.
# Auto-generated by Tools/build/generate_re_casefix.py.

# Maps the code of lowercased character to codes of different lowercased
# characters which have the same uppercase.
Expand Down
32 changes: 17 additions & 15 deletions Lib/re/_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@ def _compile(code, pattern, flags):
else:
emit(ANY)
elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error("internal: unsupported template operator %r" % (op,))
if _simple(av[2]):
emit(REPEATING_CODES[op][2])
skip = _len(code); emit(0)
Expand Down Expand Up @@ -152,7 +150,7 @@ def _compile(code, pattern, flags):
if lo > MAXCODE:
raise error("looks too much behind")
if lo != hi:
raise error("look-behind requires fixed-width pattern")
raise PatternError("look-behind requires fixed-width pattern")
emit(lo) # look behind
_compile(code, av[1], flags)
emit(SUCCESS)
Expand Down Expand Up @@ -211,7 +209,7 @@ def _compile(code, pattern, flags):
else:
code[skipyes] = _len(code) - skipyes + 1
else:
raise error("internal: unsupported operand type %r" % (op,))
raise PatternError(f"internal: unsupported operand type {op!r}")

def _compile_charset(charset, flags, code):
# compile charset subprogram
Expand All @@ -237,7 +235,7 @@ def _compile_charset(charset, flags, code):
else:
emit(av)
else:
raise error("internal: unsupported set operator %r" % (op,))
raise PatternError(f"internal: unsupported set operator {op!r}")
emit(FAILURE)

def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
Expand All @@ -250,19 +248,19 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
while True:
try:
if op is LITERAL:
if fixup:
lo = fixup(av)
charmap[lo] = 1
if fixes and lo in fixes:
for k in fixes[lo]:
if fixup: # IGNORECASE and not LOCALE
av = fixup(av)
charmap[av] = 1
if fixes and av in fixes:
for k in fixes[av]:
charmap[k] = 1
if not hascased and iscased(av):
hascased = True
else:
charmap[av] = 1
elif op is RANGE:
r = range(av[0], av[1]+1)
if fixup:
if fixup: # IGNORECASE and not LOCALE
if fixes:
for i in map(fixup, r):
charmap[i] = 1
Expand All @@ -289,8 +287,7 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
# Character set contains non-BMP character codes.
# For range, all BMP characters in the range are already
# proceeded.
if fixup:
hascased = True
if fixup: # IGNORECASE and not LOCALE
# For now, IN_UNI_IGNORE+LITERAL and
# IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP
# characters, because two characters (at least one of
Expand All @@ -301,7 +298,13 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
# Also, both c.lower() and c.lower().upper() are single
# characters for every non-BMP character.
if op is RANGE:
op = RANGE_UNI_IGNORE
if fixes: # not ASCII
op = RANGE_UNI_IGNORE
hascased = True
else:
assert op is LITERAL
if not hascased and iscased(av):
hascased = True
tail.append((op, av))
break

Expand Down Expand Up @@ -763,4 +766,3 @@ def compile(p, flags=0):
p.state.groups-1,
groupindex, tuple(indexgroup)
)

9 changes: 5 additions & 4 deletions Lib/re/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@

# update when constants are added or removed

MAGIC = 20221023
MAGIC = 20230612

from _sre import MAXREPEAT, MAXGROUPS

# SRE standard exception (access as sre.error)
# should this really be here?

class error(Exception):
class PatternError(Exception):
"""Exception raised for invalid regular expressions.

Attributes:
Expand Down Expand Up @@ -53,6 +53,9 @@ def __init__(self, msg, pattern=None, pos=None):
super().__init__(msg)


# Backward compatibility after renaming in 3.13
error = PatternError

class _NamedIntConstant(int):
def __new__(cls, value, name):
self = super(_NamedIntConstant, cls).__new__(cls, value)
Expand Down Expand Up @@ -204,7 +207,6 @@ def _makecodes(*names):
}

# flags
SRE_FLAG_TEMPLATE = 1 # template mode (unknown purpose, deprecated)
SRE_FLAG_IGNORECASE = 2 # case insensitive
SRE_FLAG_LOCALE = 4 # honour system locale
SRE_FLAG_MULTILINE = 8 # treat target as multiline string
Expand All @@ -218,4 +220,3 @@ def _makecodes(*names):
SRE_INFO_PREFIX = 1 # has prefix
SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
SRE_INFO_CHARSET = 4 # pattern starts with character from given set
RE_INFO_CHARSET = 4 # pattern starts with character from given set
7 changes: 4 additions & 3 deletions Lib/re/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,11 @@
"x": SRE_FLAG_VERBOSE,
# extensions
"a": SRE_FLAG_ASCII,
"t": SRE_FLAG_TEMPLATE,
"u": SRE_FLAG_UNICODE,
}

TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE
GLOBAL_FLAGS = SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE
GLOBAL_FLAGS = SRE_FLAG_DEBUG

# Maximal value returned by SubPattern.getwidth().
# Must be larger than MAXREPEAT, MAXCODE and sys.maxsize.
Expand Down Expand Up @@ -781,8 +780,10 @@ def _parse(source, state, verbose, nested, first=False):
source.tell() - start)
if char == "=":
subpatternappend((ASSERT, (dir, p)))
else:
elif p:
subpatternappend((ASSERT_NOT, (dir, p)))
else:
subpatternappend((FAILURE, ()))
continue

elif char == "(":
Expand Down
78 changes: 0 additions & 78 deletions Lib/sre_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,81 +5,3 @@

from re import _constants as _
globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'})

if __name__ == "__main__":
def dump(f, d, typ, int_t, prefix):
items = sorted(d)
f.write(f"""\
#[derive(num_enum::TryFromPrimitive, Debug)]
#[repr({int_t})]
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
pub enum {typ} {{
""")
for item in items:
name = str(item).removeprefix(prefix)
val = int(item)
f.write(f" {name} = {val},\n")
f.write("""\
}
""")
import sys
if len(sys.argv) > 1:
constants_file = sys.argv[1]
else:
import os
constants_file = os.path.join(os.path.dirname(__file__), "../../sre-engine/src/constants.rs")
with open(constants_file, "w") as f:
f.write("""\
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* NOTE: This file is generated by sre_constants.py. If you need
* to change anything in here, edit sre_constants.py and run it.
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/

""")

f.write("use bitflags::bitflags;\n\n");

f.write("pub const SRE_MAGIC: usize = %d;\n" % MAGIC)

dump(f, OPCODES, "SreOpcode", "u32", "")
dump(f, ATCODES, "SreAtCode", "u32", "AT_")
dump(f, CHCODES, "SreCatCode", "u32", "CATEGORY_")

def bitflags(typ, int_t, prefix, flags):
f.write(f"""\
bitflags! {{
pub struct {typ}: {int_t} {{
""")
for name in flags:
val = globals()[prefix + name]
f.write(f" const {name} = {val};\n")
f.write("""\
}
}
""")

bitflags("SreFlag", "u16", "SRE_FLAG_", [
"TEMPLATE",
"IGNORECASE",
"LOCALE",
"MULTILINE",
"DOTALL",
"UNICODE",
"VERBOSE",
"DEBUG",
"ASCII",
])

bitflags("SreInfo", "u32", "SRE_INFO_", [
"PREFIX", "LITERAL", "CHARSET",
])

print("done")
Loading
Loading