Skip to content

Commit

Permalink
split_regex: Disable remainder on Python 3.8
Browse files Browse the repository at this point in the history
This isnt compatible with Python 3.8 and is unnecessary.

Fixes #15
  • Loading branch information
jayvdb committed Feb 26, 2020
1 parent 60550f1 commit ea0e168
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 27 deletions.
11 changes: 8 additions & 3 deletions https_everywhere/_unregex.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,15 @@ def expand_pattern(pattern, max_count=100):
return [i.replace("~~", "*.").replace("~", "*").replace(",,", ".*") for i in rv]


def split_regex(pattern, at):
def split_regex(pattern, at, remainer=False):
if not isinstance(pattern, sre_parse.SubPattern):
pattern = sre_parse.parse(pattern)
found = False
new = sre_parse.SubPattern(pattern.pattern)
if remainer:
# This doesnt work on Python 3.8
new = sre_parse.SubPattern(pattern.pattern)
else:
new = None
for i, (tok, val) in enumerate(pattern.data.copy()):
if not found and tok == sre_parse.LITERAL and val == ord(at):
found = True
Expand All @@ -141,7 +145,8 @@ def split_regex(pattern, at):
del pattern[i]
continue
if found:
new.append((tok, val))
if remainer:
new.append((tok, val))
del pattern[-1]
if not found:
return pattern, None
Expand Down
87 changes: 63 additions & 24 deletions tests/test_unregex.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import sre_compile
import sre_parse
import sys
import unittest

from sre_parse import LITERAL

from https_everywhere._unregex import expand_pattern, split_regex

PY38 = sys.version_info[:2] >= (3, 8)


class TestExpandRegex(unittest.TestCase):
def test_start(self):
Expand Down Expand Up @@ -39,100 +42,136 @@ def test_complex_nothing(self):
assert rv == ["www.*", "*"]

def test_split_none(self):
rv = split_regex(r"^ab", "/")
rv = split_regex(r"^ab", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert rv[1] is None

def test_split_one(self):
rv = split_regex(r"a/b", "/")
rv = split_regex(r"a/b", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert isinstance(rv[1], sre_parse.SubPattern), rv[1].__class__.__name__
assert rv[0].data == [(sre_parse.LITERAL, ord("a"))]
assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]

assert rv[0].pattern is rv[1].pattern

c0 = sre_compile.compile(rv[0])
c1 = sre_compile.compile(rv[1])
assert c0.match("a")
assert not c0.match("b")
assert c0.pattern is None

if PY38:
return

assert isinstance(rv[1], sre_parse.SubPattern), rv[1].__class__.__name__
assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]
assert rv[0].pattern is rv[1].pattern

c1 = sre_compile.compile(rv[1])
assert c1.match("b")
assert not c1.match("a")
assert c0.pattern is None
assert c1.pattern is None

def test_split_multiple(self):
rv = split_regex(r"a/b/c", "/")
rv = split_regex(r"a/b/c", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert isinstance(rv[1], sre_parse.SubPattern), rv[1].__class__.__name__
assert rv[0].data == [(LITERAL, ord("a"))]
assert rv[1].data == [(LITERAL, ord("b")), (LITERAL, 47), (LITERAL, 99)]

assert rv[0].pattern is rv[1].pattern

c0 = sre_compile.compile(rv[0])
c1 = sre_compile.compile(rv[1])
assert c0.match("a")
assert not c0.match("b")
assert c0.pattern is None

if PY38:
return

assert isinstance(rv[1], sre_parse.SubPattern), rv[1].__class__.__name__
assert rv[1].data == [(LITERAL, ord("b")), (LITERAL, 47), (LITERAL, 99)]
assert rv[0].pattern is rv[1].pattern

c1 = sre_compile.compile(rv[1])
assert not c1.match("b")
assert not c1.match("a")
assert c1.match("b/c")
assert c0.pattern is None
assert c1.pattern is None

def test_split_at(self):
rv = split_regex(r"^/b", "/")
rv = split_regex(r"^/b", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert isinstance(rv[1], sre_parse.SubPattern), rv[1].__class__.__name__
assert rv[0].data == [(sre_parse.AT, sre_parse.AT_BEGINNING)]

if PY38:
return

assert isinstance(rv[1], sre_parse.SubPattern), rv[1].__class__.__name__
assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]

def test_split_skip_not(self):
rv = split_regex(r"[^/]a/b", "/")
rv = split_regex(r"[^/]a/b", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert isinstance(rv[1], sre_parse.SubPattern), rv[1].__class__.__name__
assert rv[0].data == [
(sre_parse.NOT_LITERAL, ord("/")),
(sre_parse.LITERAL, ord("a")),
]

if PY38:
return

assert isinstance(rv[1], sre_parse.SubPattern), rv[1].__class__.__name__
assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]

def test_split_min_max(self):
rv = split_regex(r"a/{1,3}b", "/")
rv = split_regex(r"a/{1,3}b", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert rv[0].data == [(sre_parse.LITERAL, ord("a"))]

if PY38:
return

assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]

def test_split_plus(self):
rv = split_regex(r"a/+b", "/")
rv = split_regex(r"a/+b", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert rv[0].data == [(sre_parse.LITERAL, ord("a"))]

if PY38:
return

assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]

def test_split_star(self):
rv = split_regex(r"a/*b", "/")
rv = split_regex(r"a/*b", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert rv[0].data == [(sre_parse.LITERAL, ord("a"))]

if PY38:
return

assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]

def test_split_class(self):
rv = split_regex(r"a[/]b", "/")
rv = split_regex(r"a[/]b", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert rv[0].data == [(sre_parse.LITERAL, ord("a"))]

if PY38:
return

assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]

def test_split_class2(self):
rv = split_regex(r"a[(/]b", "/")
rv = split_regex(r"a[(/]b", "/", remainer=not PY38)
assert isinstance(rv, tuple), rv
assert isinstance(rv[0], sre_parse.SubPattern), rv
assert rv[0].data == [(sre_parse.LITERAL, ord("a"))]

if PY38:
return

assert rv[1].data == [(sre_parse.LITERAL, ord("b"))]

0 comments on commit ea0e168

Please sign in to comment.