Give each operator its own token type

This commit is contained in:
Juhani Krekelä 2025-02-08 17:17:01 +02:00
parent 291e7dec9b
commit 608a9047d8
2 changed files with 38 additions and 21 deletions

View file

@ -4,7 +4,7 @@ import re
import unittest
class kinds(enum.Enum):
unknown, eof, number, operator0, operator1, openparen, closeparen = range(7)
unknown, eof, number, plus, minus, multiply, divide, openparen, closeparen = range(9)
@dataclasses.dataclass
class Token:
@ -14,8 +14,10 @@ class Token:
table = [(re.compile(ex), kind) for ex, kind in [
(r'\d+(\.\d+)?', kinds.number),
(r'[+-]', kinds.operator0),
(r'[*/]', kinds.operator1),
(r'\+', kinds.plus),
(r'-', kinds.minus),
(r'\*', kinds.multiply),
(r'/', kinds.divide),
(r'\(', kinds.openparen),
(r'\)', kinds.closeparen),
]]
@ -72,9 +74,9 @@ class Tests(unittest.TestCase):
def test_expression(self):
self.assertTokenization('1+2 * 3', [
Token(0, '1', kinds.number),
Token(1, '+', kinds.operator0),
Token(1, '+', kinds.plus),
Token(2, '2', kinds.number),
Token(4, '*', kinds.operator1),
Token(4, '*', kinds.multiply),
Token(6, '3', kinds.number),
Token(7, '', kinds.eof)
])
@ -83,10 +85,25 @@ class Tests(unittest.TestCase):
self.assertTokenization('(1+2) * 3', [
Token(0, '(', kinds.openparen),
Token(1, '1', kinds.number),
Token(2, '+', kinds.operator0),
Token(2, '+', kinds.plus),
Token(3, '2', kinds.number),
Token(4, ')', kinds.closeparen),
Token(6, '*', kinds.operator1),
Token(6, '*', kinds.multiply),
Token(8, '3', kinds.number),
Token(9, '', kinds.eof)
])
def test_kinds(self):
tokenization = [
Token(0, '0', kinds.number),
Token(1, '+', kinds.plus),
Token(2, '-', kinds.minus),
Token(3, '*', kinds.multiply),
Token(4, '/', kinds.divide),
Token(5, '(', kinds.openparen),
Token(6, ')', kinds.closeparen),
Token(7, '.', kinds.unknown),
Token(8, '', kinds.eof)
]
self.assertTokenization('0+-*/().', tokenization)
self.assertCountEqual(kinds, [i.kind for i in tokenization])

View file

@ -26,7 +26,7 @@ def parse(string):
def expression0():
tree = expression1()
while peek().kind == kinds.operator0:
while peek().kind in (kinds.plus, kinds.minus):
operator = consume()
tree = BinOp(
operator=operator,
@ -37,7 +37,7 @@ def parse(string):
def expression1():
tree = atom()
while peek().kind == kinds.operator1:
while peek().kind in (kinds.multiply, kinds.divide):
operator = consume()
tree = BinOp(
operator=operator,
@ -65,10 +65,10 @@ def parse(string):
class Tests(unittest.TestCase):
def test_addSub(self):
self.assertEqual(parse('1+2-3-4+5'),
BinOp(Token(7, '+', kinds.operator0),
BinOp(Token(5, '-', kinds.operator0),
BinOp(Token(3, '-', kinds.operator0),
BinOp(Token(1, '+', kinds.operator0),
BinOp(Token(7, '+', kinds.plus),
BinOp(Token(5, '-', kinds.minus),
BinOp(Token(3, '-', kinds.minus),
BinOp(Token(1, '+', kinds.plus),
Atom(Token(0, '1', kinds.number)),
Atom(Token(2, '2', kinds.number))
),
@ -82,8 +82,8 @@ class Tests(unittest.TestCase):
def test_precedence(self):
self.assertEqual(parse('1*2+3'),
BinOp(Token(3, '+', kinds.operator0),
BinOp(Token(1, '*', kinds.operator1),
BinOp(Token(3, '+', kinds.plus),
BinOp(Token(1, '*', kinds.multiply),
Atom(Token(0, '1', kinds.number)),
Atom(Token(2, '2', kinds.number))
),
@ -91,9 +91,9 @@ class Tests(unittest.TestCase):
)
)
self.assertEqual(parse('1-2/3'),
BinOp(Token(1, '-', kinds.operator0),
BinOp(Token(1, '-', kinds.minus),
Atom(Token(0, '1', kinds.number)),
BinOp(Token(3, '/', kinds.operator1),
BinOp(Token(3, '/', kinds.divide),
Atom(Token(2, '2', kinds.number)),
Atom(Token(4, '3', kinds.number))
)
@ -102,17 +102,17 @@ class Tests(unittest.TestCase):
def test_parentheses(self):
self.assertEqual(parse('1*(2+3)'),
BinOp(Token(1, '*', kinds.operator1),
BinOp(Token(1, '*', kinds.multiply),
Atom(Token(0, '1', kinds.number)),
BinOp(Token(4, '+', kinds.operator0),
BinOp(Token(4, '+', kinds.plus),
Atom(Token(3, '2', kinds.number)),
Atom(Token(5, '3', kinds.number))
)
)
)
self.assertEqual(parse('(1-2)/3'),
BinOp(Token(5, '/', kinds.operator1),
BinOp(Token(2, '-', kinds.operator0),
BinOp(Token(5, '/', kinds.divide),
BinOp(Token(2, '-', kinds.minus),
Atom(Token(1, '1', kinds.number)),
Atom(Token(3, '2', kinds.number))
),