-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.py
97 lines (84 loc) · 2.64 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from sly import Lexer
class TetroScriptLexer(Lexer):
"""
Defines the functionality of Lexer used by TetroScript Language.
Scans input string and creates tokens.
Tokens are then further used by Parser for further stages of compilation.
"""
# Represent a set of legal tokens.
tokens = {
# Generic tokens
TOK_NUMBER,
TOK_ID,
TOK_TYPE_OPTION,
}
# White space between tokens is ignored by the scanner.
ignore = " \t"
# Ignore comments
ignore_comment = r'\#.*'
# Punctuation literals are converted into tokens with type same as value.
# Example: "=" is converted to Token(type="=", value="=").
literals = {
"=",
"{",
"}",
"[",
"]",
",",
";"
}
# Reserved keywords
# Define value:token dictionary
reserved_keywords = {
"gameboard": "TOK_GAMEBOARD",
"x_size": "TOK_X_SIZE",
"y_size": "TOK_Y_SIZE",
"falling_speed": "FALLING_SPEED",
"tetromino": "TOK_TETROMINO",
"type": "TOK_TYPE",
"color": "TOK_COLOR",
"control": "TOK_CONTROL",
"key": "TOK_KEY",
"action": "TOK_ACTION",
"game": "TOK_GAME",
"board": "TOK_BOARD",
"blocks": "TOK_BLOCKS",
"levels": "TOK_LEVELS",
"controls": "TOK_CONTROLS",
"main": "TOK_MAIN",
"play": "TOK_PLAY",
}
# Update tokens set to include reserved keyword tokens too.
tokens = tokens.union(reserved_keywords.values())
# Regex for matching token type option
# User has 7 options: T1, T2, T3, T4, T5, T6, T7
TOK_TYPE_OPTION = r"T[1-7]"
@_(r'[a-z][a-z0-9_]*')
def TOK_ID(self, t):
"""
Regex for matching TOK_ID.
TOK_ID can start with lowercase letter followed by any number of
lowercase letter, digits or underscores.
"""
# If lexicon is reserved keyword, return appropriate token.
t.type = self.reserved_keywords.get(t.value, 'TOK_ID')
return t
@_(r'\d+')
def TOK_NUMBER(self, t):
"""
Regex to match TOK_NUMBER
Converts the string to integer (number) before returning.
"""
t.value = int(t.value)
return t
# Define a rule so we can track line numbers
@_(r'\n+')
def ignore_newline(self, t):
self.lineno += len(t.value)
pass # Basically ignore new line
def error(self, t):
"""
Handle error, incase an illegal character is encountered.
"""
print(f"ERROR: An illegal character {t.value[0]} in {self.lineno}.")
self.index += 1 # Move ahead one character