109 lines
2.5 KiB
Python
109 lines
2.5 KiB
Python
import dataclasses
|
|
import enum
|
|
import re
|
|
import unittest
|
|
|
|
class kinds(enum.Enum):
|
|
unknown, eof, number, plus, minus, multiply, divide, openparen, closeparen = range(9)
|
|
|
|
@dataclasses.dataclass
|
|
class Token:
|
|
index: int
|
|
text: str
|
|
kind: kinds
|
|
|
|
table = [(re.compile(ex), kind) for ex, kind in [
|
|
(r'\d+(\.\d+)?', kinds.number),
|
|
(r'\+', kinds.plus),
|
|
(r'-', kinds.minus),
|
|
(r'\*', kinds.multiply),
|
|
(r'/', kinds.divide),
|
|
(r'\(', kinds.openparen),
|
|
(r'\)', kinds.closeparen),
|
|
]]
|
|
|
|
def lex(string):
|
|
index = 0
|
|
while index < len(string):
|
|
if string[index].isspace():
|
|
while index < len(string) and string[index].isspace():
|
|
index += 1
|
|
continue
|
|
|
|
for regex, kind in table:
|
|
token = re.match(regex, string[index:])
|
|
if token is not None:
|
|
_, length = token.span()
|
|
break
|
|
else:
|
|
length = 1
|
|
kind = kinds.unknown
|
|
|
|
yield Token(
|
|
index=index,
|
|
text=string[index:index+length],
|
|
kind=kind
|
|
)
|
|
index += length
|
|
yield Token(
|
|
index=index,
|
|
text='',
|
|
kind=kinds.eof
|
|
)
|
|
|
|
class Tests(unittest.TestCase):
|
|
def assertTokenization(self, string, expected):
|
|
tokenized = list(lex(string))
|
|
self.assertEqual(tokenized, expected)
|
|
|
|
def test_space(self):
|
|
self.assertTokenization(' \n\t', [
|
|
Token(3, '', kinds.eof)
|
|
])
|
|
|
|
def test_numbers(self):
|
|
self.assertTokenization('1234567890 3.3 0 .2', [
|
|
Token(0, '1234567890', kinds.number),
|
|
Token(11, '3.3', kinds.number),
|
|
Token(15, '0', kinds.number),
|
|
Token(17, '.', kinds.unknown),
|
|
Token(18, '2', kinds.number),
|
|
Token(19, '', kinds.eof)
|
|
])
|
|
|
|
def test_expression(self):
|
|
self.assertTokenization('1+2 * 3', [
|
|
Token(0, '1', kinds.number),
|
|
Token(1, '+', kinds.plus),
|
|
Token(2, '2', kinds.number),
|
|
Token(4, '*', kinds.multiply),
|
|
Token(6, '3', kinds.number),
|
|
Token(7, '', kinds.eof)
|
|
])
|
|
|
|
def test_parentheses(self):
|
|
self.assertTokenization('(1+2) * 3', [
|
|
Token(0, '(', kinds.openparen),
|
|
Token(1, '1', kinds.number),
|
|
Token(2, '+', kinds.plus),
|
|
Token(3, '2', kinds.number),
|
|
Token(4, ')', kinds.closeparen),
|
|
Token(6, '*', kinds.multiply),
|
|
Token(8, '3', kinds.number),
|
|
Token(9, '', kinds.eof)
|
|
])
|
|
|
|
def test_kinds(self):
|
|
tokenization = [
|
|
Token(0, '0', kinds.number),
|
|
Token(1, '+', kinds.plus),
|
|
Token(2, '-', kinds.minus),
|
|
Token(3, '*', kinds.multiply),
|
|
Token(4, '/', kinds.divide),
|
|
Token(5, '(', kinds.openparen),
|
|
Token(6, ')', kinds.closeparen),
|
|
Token(7, '.', kinds.unknown),
|
|
Token(8, '', kinds.eof)
|
|
]
|
|
self.assertTokenization('0+-*/().', tokenization)
|
|
self.assertCountEqual(kinds, [i.kind for i in tokenization])
|