-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.mll
118 lines (110 loc) · 3.19 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
{
open Lexing
open Parser
open Printf
let ignore_new_line lexbuf =
let lcp = lexbuf.lex_curr_p in
if lcp != dummy_pos then
lexbuf.lex_curr_p <-
{ lcp with
pos_lnum = lcp.pos_lnum + 1;
pos_bol = lcp.pos_cnum;
};
lexbuf.lex_start_p <- lexbuf.lex_curr_p
}
let dec_digit = ['0'-'9']
let signed_int = dec_digit+ | ('-' dec_digit+)
let ident = ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let blank = [' ' '\t']+
let tyident = "'"['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let space = [' ' '\t' '\n']+
rule token = parse
| '#' [^ '\n']+ { token lexbuf }
| blank "(" { LPARENSPACE }
| '\n' "(" { ignore_new_line lexbuf; LPARENSPACE }
| blank "<=" { LESSEQ }
| '\n' "<=" { ignore_new_line lexbuf; LESSEQ }
| blank "<" { LESSSPACE }
| '\n' "<" { ignore_new_line lexbuf; LESSSPACE }
| blank { token lexbuf }
| '\n' { new_line lexbuf; token lexbuf }
| signed_int as x { NUM (Int64.of_string x) }
| '"' '"' '"' { parse_string (Buffer.create 100) true lexbuf}
| '"' { parse_string (Buffer.create 100) false lexbuf }
| "def" { DEF }
| "and" { ANDDEF }
| "print" { PRINT }
| "printStack" { PRINTSTACK }
| "nil" { NIL }
| "true" { TRUE }
| "false" { FALSE }
| "istuple" { ISTUPLE }
| "isbool" { ISBOOL }
| "isnum" { ISNUM }
| "isstr" { ISSTR }
| "tonum" { TONUM }
| "split" {SPLIT}
| "join" {JOIN}
| "tuple" { TUPLE }
| "tostr" { TOSTR }
| "tobool" { TOBOOL }
| "add1" { ADD1 }
| "sub1" { SUB1 }
| "lambda" { LAMBDA }
| "λ" { LAMBDA }
| "if" { IF }
| ":" { COLON }
| "else:" { ELSECOLON }
| "let" { LET }
| "in" { IN }
| "=" { EQUAL }
| "," { COMMA }
| "(" { LPARENNOSPACE }
| ")" { RPAREN }
| "[" { LBRACK }
| "]" { RBRACK }
| "+" { PLUS }
| "-" { MINUS }
| "*" { TIMES }
| ":=" { COLONEQ }
| "==" { EQEQ }
| ">" { GREATER }
| "<=" { LESSEQ }
| ">=" { GREATEREQ }
| "&&" { AND }
| "||" { OR }
| "!" { NOT }
| ";" { SEMI }
| "^" { CONCAT }
| "begin" { BEGIN }
| "end" { END }
| "rec" { REC }
| "shadow" { SHADOW }
| "." { DOT }
| ident as x { if x = "_" then UNDERSCORE else ID x }
| eof { EOF }
| _ as c { failwith (sprintf "Unrecognized character: %c" c) }
and parse_string str is_herestring =
parse
| '"' '"' '"' {
if is_herestring
then STR (Buffer.contents str)
else failwith "Herestring terminated in non-herestring literal"
}
| '"' {
if is_herestring
then (Buffer.add_char str '"'; parse_string str is_herestring lexbuf)
else (STR (Buffer.contents str))
}
| '\\' '"' { Buffer.add_char str '"'; parse_string str is_herestring lexbuf }
| '\\' 'r' { Buffer.add_char str '\r'; parse_string str is_herestring lexbuf }
| '\\' 'n' { Buffer.add_char str '\n'; parse_string str is_herestring lexbuf }
| '\\' 't' { Buffer.add_char str '\t'; parse_string str is_herestring lexbuf }
| '\\' '\\' { Buffer.add_char str '\\'; parse_string str is_herestring lexbuf }
| '\n' {
if is_herestring
then (Buffer.add_char str '\n'; parse_string str is_herestring lexbuf)
else (failwith "Unterminated string literal")
}
| _ as c { Buffer.add_char str c; parse_string str is_herestring lexbuf }
| eof { failwith "Unterminated string" }