124 lines
3.0 KiB
Python
124 lines
3.0 KiB
Python
class Literal:
|
|
def __init__(self, text):
|
|
self.text = text
|
|
self.single_char = len(text) == 1
|
|
|
|
def __repr__(self):
|
|
return 'Literal(%s)' % repr(self.text)
|
|
|
|
def __str__(self):
|
|
# ERE-style quotation rules
|
|
# A-Za-z0-9 and space are safe, as are non-ASCII
|
|
# Everything else is safe to quote with backslash
|
|
return ''.join(
|
|
char if ord('A') <= ord(char) <= ord('Z') else
|
|
char if ord('a') <= ord(char) <= ord('z') else
|
|
char if ord('0') <= ord(char) <= ord('9') else
|
|
char if char == ' ' else
|
|
char if ord(char) >= 128 else # Non-ASCII
|
|
'\\' + char # Quote
|
|
|
|
for char in self.text
|
|
)
|
|
|
|
class Concatenation:
|
|
def __init__(self, *elements):
|
|
self.elements = elements
|
|
|
|
def __repr__(self):
|
|
return 'Concatenation(%s)' % ', '.join(map(repr, self.elements))
|
|
|
|
def __str__(self):
|
|
return ''.join(
|
|
# Only alternation binds looser than concatenation,
|
|
# so we can pass everything else pass through with
|
|
# no parenthesizing
|
|
str(element) if type(element) != Alternation else
|
|
'(' + str(element) + ')'
|
|
|
|
for element in self.elements
|
|
)
|
|
|
|
class Alternation:
|
|
def __init__(self, *elements):
|
|
self.elements = elements
|
|
|
|
def __repr__(self):
|
|
return 'Alternation(%s)' % ', '.join(map(repr, self.elements))
|
|
|
|
def __str__(self):
|
|
# Nothing binds looser than alternation, so just pass
|
|
# everything through as-is
|
|
return '|'.join(map(str, self.elements))
|
|
|
|
class Star:
|
|
def __init__(self, element):
|
|
self.element = element
|
|
|
|
def __repr__(self):
|
|
return 'Star(%s)' % repr(self.element)
|
|
|
|
def __str__(self):
|
|
# * applies to the previous character or a parenthesized
|
|
# group. Therefore, we parentesize unless we havea Literal
|
|
# and it is one-char long
|
|
if type(self.element) == Literal and self.element.single_char:
|
|
return str(self.element) + '*'
|
|
else:
|
|
return '(%s)*' % str(self.element)
|
|
|
|
def lit(text):
|
|
return Literal(text)
|
|
|
|
def concat(*elements):
|
|
# TODO: drop literals that are just ''
|
|
flattened = []
|
|
for element in elements:
|
|
if type(element) == Concatenation:
|
|
flattened.extend(element.elements)
|
|
else:
|
|
flattened.append(element)
|
|
|
|
combined = []
|
|
for element in flattened:
|
|
if type(element) == Literal and element.text == '':
|
|
# Drop empty literals
|
|
continue
|
|
|
|
elif len(combined) > 0 and type(combined[-1]) == Literal and type(element) == Literal:
|
|
# Combine two literals next to each other
|
|
# into one literal
|
|
previous = combined.pop()
|
|
combined.append(Literal(previous.text + element.text))
|
|
|
|
else:
|
|
combined.append(element)
|
|
|
|
if len(combined) == 0:
|
|
# Empty regex, represent with empty literal
|
|
return lit('')
|
|
|
|
elif len(combined) == 1:
|
|
element, = combined
|
|
return element
|
|
else:
|
|
return Concatenation(*combined)
|
|
|
|
def bar(*elements):
|
|
# TODO: rewrite (foo|foo) → foo
|
|
flattened = []
|
|
for element in elements:
|
|
if type(element) == Alternation:
|
|
flattened.extend(element.elements)
|
|
else:
|
|
flattened.append(element)
|
|
|
|
if len(flattened) == 1:
|
|
element, = flattened
|
|
return element
|
|
else:
|
|
return Alternation(*flattened)
|
|
|
|
def star(element):
|
|
return Star(element)
|