Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement printf %()T %.s %*s %.*s #668

Merged
merged 10 commits into from
Mar 19, 2020
2 changes: 1 addition & 1 deletion frontend/id_kind_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def AddKinds(spec):
spec.AddKind('Format', [
'EscapedPercent',
'Percent', # starts another lexer mode
'Flag', 'Num', 'Dot', 'Type',
'Flag', 'Num', 'Dot', 'Type', 'Star', 'Time', 'Zero',
])

# For parsing prompt strings like PS1.
Expand Down
7 changes: 5 additions & 2 deletions frontend/lexer_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,13 +530,16 @@ def IsKeyword(name):
# Maybe: bash also supports %(strftime)T
LEXER_DEF[lex_mode_e.PrintfPercent] = [
# Flags
R('[-0 +#]', Id.Format_Flag),
R('[- +#]', Id.Format_Flag),
C('0', Id.Format_Zero),

R('[1-9][0-9]*', Id.Format_Num),
C('*', Id.Format_Star),
C('.', Id.Format_Dot),
# We support dsq. The others we parse to display an error message.
R('[disqbcouxXeEfFgG]', Id.Format_Type),
R(r'[^\0]', Id.Unknown_Tok), # any otehr char
R('\([^()]*\)T', Id.Format_Time),
R(r'[^\0]', Id.Unknown_Tok), # any other char
]

LEXER_DEF[lex_mode_e.VSub_1] = [
Expand Down
114 changes: 96 additions & 18 deletions osh/builtin_printf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@
from __future__ import print_function

from _devbuild.gen.id_kind_asdl import Id, Kind
from _devbuild.gen.runtime_asdl import cmd_value__Argv
from _devbuild.gen.runtime_asdl import cmd_value__Argv, value_e, value__Str
from _devbuild.gen.syntax_asdl import (
printf_part, printf_part_t,
source
source,
)
from _devbuild.gen.types_asdl import lex_mode_e, lex_mode_t

import sys
import time
import os

from asdl import runtime
from core import error
Expand All @@ -27,7 +29,7 @@
from osh import string_ops
from osh import word_compile

from typing import Dict, List, TYPE_CHECKING
from typing import Dict, List, TYPE_CHECKING, cast

if TYPE_CHECKING:
from frontend.lexer import Lexer
Expand All @@ -40,12 +42,15 @@
PRINTF_SPEC = arg_def.Register('printf') # TODO: Don't need this?
PRINTF_SPEC.ShortFlag('-v', args.Str)

shell_start_time = time.time()

class _FormatStringParser(object):
"""
Grammar:

fmt = Format_Percent Flag? Num? (Dot Num)? Type
width = Num | Star
precision = Dot (Num | Star | Zero)?
fmt = Percent (Flag | Zero)* width? precision? (Type | Time)
part = Char_* | Format_EscapedPercent | fmt
printf_format = part* Eof_Real # we're using the main lexer

Expand All @@ -70,7 +75,7 @@ def _ParseFormatStr(self):
self._Next(lex_mode_e.PrintfPercent) # move past %

part = printf_part.Percent()
if self.token_type == Id.Format_Flag:
if self.token_type in (Id.Format_Flag, Id.Format_Zero):
part.flag = self.cur_token
self._Next(lex_mode_e.PrintfPercent)

Expand All @@ -79,16 +84,18 @@ def _ParseFormatStr(self):
if flag in '# +':
p_die("osh printf doesn't support the %r flag", flag, token=part.flag)

if self.token_type == Id.Format_Num:
if self.token_type in (Id.Format_Num, Id.Format_Star):
part.width = self.cur_token
self._Next(lex_mode_e.PrintfPercent)

if self.token_type == Id.Format_Dot:
self._Next(lex_mode_e.PrintfPercent) # past dot
part.precision = self.cur_token
self._Next(lex_mode_e.PrintfPercent)
self._Next(lex_mode_e.PrintfPercent) # past dot
if self.token_type in (Id.Format_Num, Id.Format_Star, Id.Format_Zero):
part.precision = self.cur_token
self._Next(lex_mode_e.PrintfPercent)

if self.token_type == Id.Format_Type:
if self.token_type in (Id.Format_Type, Id.Format_Time):
part.type = self.cur_token

# ADDITIONAL VALIDATION outside the "grammar".
Expand All @@ -108,7 +115,7 @@ def _ParseFormatStr(self):
p_die(msg, token=self.cur_token)

# Do this check AFTER the floating point checks
if part.precision and part.type.val not in 'fs':
if part.precision and part.type.val[-1] not in 'fsT':
p_die("precision can't be specified when here",
token=part.precision)

Expand Down Expand Up @@ -205,28 +212,83 @@ def Run(self, cmd_val):
out.append(s)

elif isinstance(part, printf_part.Percent):
try:
width = None
if part.width:
if part.width.id in (Id.Format_Num, Id.Format_Zero):
width = part.width.val
width_spid = part.width.span_id
elif part.width.id == Id.Format_Star:
if arg_index < num_args:
width = varargs[arg_index]
width_spid = spids[arg_index]
arg_index += 1
else:
width = ''
width_spid = runtime.NO_SPID
else:
raise AssertionError()

try:
width = int(width)
except ValueError:
if width_spid == runtime.NO_SPID:
width_spid = part.width.span_id
self.errfmt.Print("printf got invalid number %r for the width", s,
span_id = width_spid)
return 1

precision = None
if part.precision:
if part.precision.id == Id.Format_Dot:
precision = '0'
precision_spid = part.precision.span_id
elif part.precision.id in (Id.Format_Num, Id.Format_Zero):
precision = part.precision.val
precision_spid = part.precision.span_id
elif part.precision.id == Id.Format_Star:
if arg_index < num_args:
precision = varargs[arg_index]
precision_spid = spids[arg_index]
arg_index += 1
else:
precision = ''
precision_spid = runtime.NO_SPID
else:
raise AssertionError()

try:
precision = int(precision)
except ValueError:
if precision_spid == runtime.NO_SPID:
precision_spid = part.precision.span_id
self.errfmt.Print("printf got invalid number %r for the precision", s,
span_id = precision_spid)
return 1

if arg_index < num_args:
s = varargs[arg_index]
word_spid = spids[arg_index]
except IndexError:
arg_index += 1
else:
s = ''
word_spid = runtime.NO_SPID

typ = part.type.val
if typ == 's':
if part.precision:
precision = int(part.precision.val)
if precision is not None:
s = s[:precision] # truncate
elif typ == 'q':
s = string_ops.ShellQuoteOneLine(s)
elif typ in 'diouxX':
elif typ in 'diouxX' or part.type.id == Id.Format_Time:
try:
d = int(s)
except ValueError:
if len(s) >= 2 and s[0] in '\'"':
# TODO: utf-8 decode s[1:] to be more correct. Probably
# depends on issue #366, a utf-8 library.
d = ord(s[1])
elif len(s) == 0 and part.type.id == Id.Format_Time:
d = -1
else:
# This works around the fact that in the arg recycling case, you have no spid.
if word_spid == runtime.NO_SPID:
Expand All @@ -252,14 +314,31 @@ def Run(self, cmd_val):
s = '%x' % d
elif typ == 'X':
s = '%X' % d
elif part.type.id == Id.Format_Time:
# set timezone
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain this part in a high level comment? I don't know these APIs very well and eventually they will have to be translated to C.

(Actually if you prefer to write in C, then native/libc.c has related functions and is fairly easy to modify.)

Does strftime read os.environ['TZ']? I'm not sure how it works exactly.


OK I looked over the docs a bit, I guess time.tzset() reads os.environ['TZ']. That's kind of confusing. Please add a comment about that.

Copy link
Collaborator Author

@akinomyoga akinomyoga Mar 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cb08494 Thank you! I added code comments. (edit: added a typofix)

tzcell = self.mem.GetCell('TZ')
if tzcell and tzcell.exported and tzcell.val.tag_() == value_e.Str:
tzval = cast(value__Str, tzcell.val)
os.environ['TZ'] = tzval.s
elif 'TZ' in os.environ:
del os.environ['TZ']
time.tzset()

if d == -1: # now
d = None
elif d == -2: # shell start time
d = shell_start_time
s = time.strftime(typ[1:-2], time.localtime(d));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no semicolon needed

Also where does -2 come from? I'm having trouble following that. May need more comments.

Copy link
Collaborator Author

@akinomyoga akinomyoga Mar 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the semicolon and added comments in cb08494. Also, I changed to explicitly set d = time.time() (not relying on the default behavior of time.localtime(None)). (edit: added a typo fix)

if precision is not None:
s = s[:precision] # truncate

else:
raise AssertionError()

else:
raise AssertionError()

if part.width:
width = int(part.width.val)
if width is not None:
if part.flag:
flag = part.flag.val
if flag == '-':
Expand All @@ -272,7 +351,6 @@ def Run(self, cmd_val):
s = s.rjust(width, ' ')

out.append(s)
arg_index += 1

else:
raise AssertionError()
Expand Down
69 changes: 58 additions & 11 deletions spec/builtin-printf.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,30 @@ printf '[%0.0s]\n' foo
## N-I mksh stdout-json: "[ ]\n["
## N-I mksh status: 1

#### printf %6.s and %0.s
printf '[%6.s]\n' foo
printf '[%0.s]\n' foo
## STDOUT:
[ ]
[]
## END
## N-I mksh stdout-json: "[ ]\n["
## N-I mksh status: 1

#### printf %*.*s (width/precision from args)
printf '[%*s]\n' 9 hello
printf '[%.*s]\n' 3 hello
printf '[%*.3s]\n' 9 hello
printf '[%9.*s]\n' 3 hello
printf '[%*.*s]\n' 9 3 hello
## STDOUT:
[ hello]
[hel]
[ hel]
[ hel]
[ hel]
## END

#### unsigned / octal / hex
printf '[%u]\n' 42
printf '[%o]\n' 42
Expand Down Expand Up @@ -491,17 +515,22 @@ printf '[% d]\n' -42

#### printf # flag
# I didn't know these existed -- I only knew about - and 0 !
printf '[%#o]\n' 42
printf '[%#x]\n' 42
printf '[%#X]\n' 42
# Note: '#' flag for integers outputs a prefix ONLY WHEN the value is non-zero
printf '[%#o][%#o]\n' 0 42
printf '[%#x][%#x]\n' 0 42
printf '[%#X][%#X]\n' 0 42
echo ---
printf '[%#f]\n' 3
## STDOUT:
[052]
[0x2a]
[0X2A]
# Note: '#' flag for %f, %g always outputs the decimal point.
printf '[%.0f][%#.0f]\n' 3 3
# Note: In addition, '#' flag for %g does not omit zeroes in fraction
printf '[%g][%#g]\n' 3 3
## STDOUT:
[0][052]
[0][0x2a]
[0][0X2A]
---
[3.000000]
[3][3.]
[3][3.00000]
## END
## N-I osh STDOUT:
---
Expand Down Expand Up @@ -541,15 +570,33 @@ status=1
## END

#### %(strftime format)T
# The result depends on timezone
export TZ=Asia/Tokyo
printf '%(%Y-%m-%d)T\n' 1557978599
export TZ=US/Eastern
printf '%(%Y-%m-%d)T\n' 1557978599
echo status=$?
## STDOUT:
2019-05-16
2019-05-15
status=0
## END
## N-I dash/mksh/zsh/ash STDOUT:
status=1
## END
## N-I osh STDOUT:
status=2

#### %10.5(strftime format)T
# The result depends on timezone
export TZ=Asia/Tokyo
printf '[%10.5(%Y-%m-%d)T]\n' 1557978599
export TZ=US/Eastern
printf '[%10.5(%Y-%m-%d)T]\n' 1557978599
echo status=$?
## STDOUT:
[ 2019-]
[ 2019-]
status=0
## END
## N-I dash/mksh/zsh/ash STDOUT:
[[status=1
## END