#!/usr/bin/env python2.7 # -*- coding: UTF-8 -*- from __future__ import print_function from __future__ import unicode_literals import re import regex import sys import os if not (("PYTHONIOENCODING" in os.environ) and re.search("^utf-?8$", os.environ["PYTHONIOENCODING"], re.I)): sys.stderr.write(sys.argv[0] + ": Please set your PYTHONIOENCODING envariable to utf8\n") sys.exit(1) import unicodedata if unicodedata.unidata_version 65536) if not wide_enough: print("WARNING: Narrow build detected, your Python lacks full Unicode support!!") data_rows = [ [ "\N{COMBINING GREEK YPOGEGRAMMENI}", "\N{GREEK CAPITAL LETTER IOTA}" ], [ "\N{MICRO SIGN}", "\N{GREEK CAPITAL LETTER MU}" ], [ "\N{LATIN SMALL LETTER S}", "\N{LATIN SMALL LETTER LONG S}" ], [ "ÏÏιγμαÏ", "ΣΤÎÎÎÎΣ" ], [ "poÅ¿t", "POST" ], ] re_pass, re_fail = 0, 0 regex_pass, regex_fail = 0, 0 for lc, uc in data_rows: if re.match(uc, lc, re.IGNORECASE + re.UNICODE): print("PASS: re pattern", uc, "is indeed the same as string", lc) re_pass = re_pass + 1 else: print("FAIL: re pattern", uc, "is not the same as string", lc) re_fail = re_fail + 1 if regex.match(uc, lc, regex.IGNORECASE + regex.UNICODE): print("PASS: regex pattern", uc, "is indeed the same as string", lc) regex_pass = regex_pass + 1 else: print("FAIL: regex pattern", uc, "is not the same as string", lc) regex_fail = regex_fail + 1 re_total = re_pass + re_fail regex_total = regex_pass + regex_fail print("") print("re lib passed", re_pass, "of", re_total, "tests") print("regex lib passed", regex_pass, "of", regex_total, "tests") ########################################## # FAIL: re pattern Î is not the same as string Í # PASS: regex pattern Î is indeed the same as string Í # FAIL: re pattern Î is not the same as string µ # PASS: regex pattern Î is indeed the same as string µ # FAIL: re pattern Å¿ is not the same as string s # PASS: regex pattern Å¿ is indeed the same as string s # FAIL: re pattern ΣΤÎÎÎÎΣ is not the same as string ÏÏÎ¹Î³Î¼Î±Ï # PASS: regex pattern ΣΤÎÎÎÎΣ is indeed the same as string ÏÏÎ¹Î³Î¼Î±Ï # FAIL: re pattern POST is not the same as string poÅ¿t # PASS: regex pattern POST is indeed the same as string poÅ¿t # # re lib passed 0 of 5 tests # regex lib passed 5 of 5 tests