-
-
Notifications
You must be signed in to change notification settings - Fork 163
/
Copy pathbuiltin_printf.py
executable file
·319 lines (264 loc) · 9.3 KB
/
builtin_printf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
#!/usr/bin/env python2
"""
builtin_printf
"""
from __future__ import print_function
from _devbuild.gen.id_kind_asdl import Id, Kind
from _devbuild.gen.runtime_asdl import cmd_value__Argv, value_e, value__Str
from _devbuild.gen.syntax_asdl import (
printf_part, printf_part_t,
source
)
from _devbuild.gen.types_asdl import lex_mode_e, lex_mode_t
import sys
import time
import os
from asdl import runtime
from core import error
from core import state
from core.util import p_die, e_die
from frontend import args
from frontend import arg_def
from frontend import consts
from frontend import match
from frontend import reader
from mycpp import mylib
from osh import string_ops
from osh import word_compile
from typing import Dict, List, TYPE_CHECKING, cast
if TYPE_CHECKING:
from frontend.lexer import Lexer
from frontend.parse_lib import ParseContext
from core.state import Mem
from core.ui import ErrorFormatter
if mylib.PYTHON:
PRINTF_SPEC = arg_def.Register('printf') # TODO: Don't need this?
PRINTF_SPEC.ShortFlag('-v', args.Str)
shell_start_time = time.time()
class _FormatStringParser(object):
"""
Grammar:
fmt = Format_Percent Flag? Num? (Dot Num)? Type
part = Char_* | Format_EscapedPercent | fmt
printf_format = part* Eof_Real # we're using the main lexer
Maybe: bash also supports %(strftime)T
"""
def __init__(self, lexer):
# type: (Lexer) -> None
self.lexer = lexer
def _Next(self, lex_mode):
# type: (lex_mode_t) -> None
"""Set the next lex state, but don't actually read a token.
We need this for proper interactive parsing.
"""
self.cur_token = self.lexer.Read(lex_mode)
self.token_type = self.cur_token.id
self.token_kind = consts.GetKind(self.token_type)
def _ParseFormatStr(self):
# type: () -> printf_part_t
self._Next(lex_mode_e.PrintfPercent) # move past %
part = printf_part.Percent()
if self.token_type == Id.Format_Flag:
part.flag = self.cur_token
self._Next(lex_mode_e.PrintfPercent)
# space and + could be implemented
flag = part.flag.val
if flag in '# +':
p_die("osh printf doesn't support the %r flag", flag, token=part.flag)
if self.token_type == Id.Format_Num:
part.width = self.cur_token
self._Next(lex_mode_e.PrintfPercent)
if self.token_type == Id.Format_Dot:
self._Next(lex_mode_e.PrintfPercent) # past dot
part.precision = self.cur_token
self._Next(lex_mode_e.PrintfPercent)
if self.token_type == Id.Format_Type:
part.type = self.cur_token
# ADDITIONAL VALIDATION outside the "grammar".
if part.type.val in 'eEfFgG':
p_die("osh printf doesn't support floating point", token=part.type)
# These two could be implemented. %c needs utf-8 decoding.
if part.type.val == 'b':
p_die("osh printf doesn't support backslash escaping (try $'\\n')", token=part.type)
if part.type.val == 'c':
p_die("osh printf doesn't support single characters (bytes)", token=part.type)
else:
if self.cur_token.val:
msg = 'Invalid printf format character'
else: # for printf '%'
msg = 'Expected a printf format character'
p_die(msg, token=self.cur_token)
# Do this check AFTER the floating point checks
if part.precision and part.type.val not in 'fs':
p_die("precision can't be specified when here",
token=part.precision)
return part
def Parse(self):
# type: () -> List[printf_part_t]
self._Next(lex_mode_e.PrintfOuter)
parts = [] # type: List[printf_part_t]
while True:
if (self.token_kind == Kind.Char or
self.token_type == Id.Format_EscapedPercent):
# TODO: Could handle Char_BadBackslash.
# Maybe make that a different kind?
parts.append(printf_part.Literal(self.cur_token))
elif self.token_type == Id.Format_Percent:
parts.append(self._ParseFormatStr())
elif self.token_type == Id.Eof_Real:
break
else:
p_die('Invalid token %r', token=self.cur_token)
self._Next(lex_mode_e.PrintfOuter)
return parts
class Printf(object):
def __init__(self, mem, parse_ctx, errfmt):
# type: (Mem, ParseContext, ErrorFormatter) -> None
self.mem = mem
self.parse_ctx = parse_ctx
self.errfmt = errfmt
self.parse_cache = {} # type: Dict[str, List[printf_part_t]]
def Run(self, cmd_val):
# type: (cmd_value__Argv) -> int
"""
printf: printf [-v var] format [argument ...]
"""
arg_r = args.Reader(cmd_val.argv, spids=cmd_val.arg_spids)
arg_r.Next() # skip argv[0]
arg, _ = PRINTF_SPEC.Parse(arg_r)
fmt, fmt_spid = arg_r.ReadRequired2('requires a format string')
varargs, spids = arg_r.Rest2()
#log('fmt %s', fmt)
#log('vals %s', vals)
arena = self.parse_ctx.arena
if fmt in self.parse_cache:
parts = self.parse_cache[fmt]
else:
line_reader = reader.StringLineReader(fmt, arena)
# TODO: Make public
lexer = self.parse_ctx._MakeLexer(line_reader)
p = _FormatStringParser(lexer)
arena.PushSource(source.ArgvWord(fmt_spid))
try:
parts = p.Parse()
except error.Parse as e:
self.errfmt.PrettyPrintError(e)
return 2 # parse error
finally:
arena.PopSource()
self.parse_cache[fmt] = parts
if 0:
print()
for part in parts:
part.PrettyPrint()
print()
out = []
arg_index = 0
num_args = len(varargs)
while True:
for part in parts:
if isinstance(part, printf_part.Literal):
token = part.token
if token.id == Id.Format_EscapedPercent:
s = '%'
else:
s = word_compile.EvalCStringToken(token.id, token.val)
out.append(s)
elif isinstance(part, printf_part.Percent):
try:
s = varargs[arg_index]
word_spid = spids[arg_index]
except IndexError:
s = ''
word_spid = runtime.NO_SPID
typ = part.type.val
if typ == 's':
if part.precision:
precision = int(part.precision.val)
s = s[:precision] # truncate
elif typ == 'q':
s = string_ops.ShellQuoteOneLine(s)
elif typ in 'diouxX' or typ.endswith('T'):
try:
d = int(s)
except ValueError:
if len(s) >= 2 and s[0] in '\'"':
# TODO: utf-8 decode s[1:] to be more correct. Probably
# depends on issue #366, a utf-8 library.
d = ord(s[1])
elif len(s) == 0 and typ.endswith('T'):
d = -1
else:
# This works around the fact that in the arg recycling case, you have no spid.
if word_spid == runtime.NO_SPID:
self.errfmt.Print("printf got invalid number %r for this substitution", s,
span_id=part.type.span_id)
else:
self.errfmt.Print("printf got invalid number %r", s,
span_id=word_spid)
return 1
if typ in 'di':
s = str(d)
elif typ in 'ouxX':
if d < 0:
e_die("Can't format negative number %d with %%%s",
d, typ, span_id=part.type.span_id)
if typ == 'u':
s = str(d)
elif typ == 'o':
s = '%o' % d
elif typ == 'x':
s = '%x' % d
elif typ == 'X':
s = '%X' % d
elif typ.endswith('T'):
# set timezone
tzcell = self.mem.GetCell('TZ')
if tzcell and tzcell.exported and tzcell.val.tag_() == value_e.Str:
tzval = cast(value__Str, tzcell.val)
os.environ['TZ'] = tzval.s
elif 'TZ' in os.environ:
del os.environ['TZ']
time.tzset()
if d == -1: # now
d = None
elif d == -2: # shell start time
d = shell_start_time
s = time.strftime(typ[1:-2], time.localtime(d));
else:
raise AssertionError()
else:
raise AssertionError()
if part.width:
width = int(part.width.val)
if part.flag:
flag = part.flag.val
if flag == '-':
s = s.ljust(width, ' ')
elif flag == '0':
s = s.rjust(width, '0')
else:
pass
else:
s = s.rjust(width, ' ')
out.append(s)
arg_index += 1
else:
raise AssertionError()
if arg_index >= num_args:
break
# Otherwise there are more args. So cycle through the loop once more to
# implement the 'arg recycling' behavior.
result = ''.join(out)
if arg.v:
var_name = arg.v
# Notes:
# - bash allows a[i] here (as in unset and ${!x}), but we haven't
# implemented it.
# - TODO: get the span_id for arg.v!
if not match.IsValidVarName(var_name):
raise args.UsageError('got invalid variable name %r' % var_name)
state.SetStringDynamic(self.mem, var_name, result)
else:
sys.stdout.write(result)
return 0