Skip to content

Commit

Permalink
Cast chars to unsigned for ctype functions
Browse files Browse the repository at this point in the history
- Calling ctype functions (isspace, isalpha, tolower, ...) with
  arguments neither representable as unsigned char nor equal to EOF is
  undefined behavior.
- This commit fixes issue Genivia#198.
  • Loading branch information
mdehling committed Mar 16, 2024
1 parent 3c2dd77 commit ef36c16
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 46 deletions.
28 changes: 14 additions & 14 deletions lib/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ static int convert_hex(const char *pattern, size_t len, size_t& pos, convert_fla
size_t n = pos + 3;
if (c == 'u')
n += 2;
while (k < n && k < len && std::isxdigit(c = pattern[k++]))
while (k < n && k < len && std::isxdigit(static_cast<unsigned char>(c = pattern[k++])))
*s++ = c;
*s = '\0';
--k;
Expand Down Expand Up @@ -337,7 +337,7 @@ static const std::string& expand(const std::map<std::string,std::string> *macros
{
// lookup {name} and expand without converting
size_t k = pos++;
while (pos < len && (std::isalnum(pattern[pos]) || pattern[pos] == '_' || (pattern[pos] & 0x80) == 0x80))
while (pos < len && (std::isalnum(static_cast<unsigned char>(pattern[pos])) || pattern[pos] == '_' || (pattern[pos] & 0x80) == 0x80))
++pos;
if (pos >= len || (pattern[pos] == '\\' ? pattern[pos + 1] != '}' : pattern[pos] != '}'))
throw regex_error(regex_error::undefined_name, pattern, pos);
Expand Down Expand Up @@ -501,7 +501,7 @@ static void expand_list(const char *pattern, size_t len, size_t& loc, size_t& po
else if ((c & 0xC0) == 0xC0 && is_modified(mod, 'u'))
{
// Unicode normalization may need to combine a previous ASCII character with a Unicode combining character
if (loc < pos && std::isalpha(pattern[pos - 1]))
if (loc < pos && std::isalpha(static_cast<unsigned char>(pattern[pos - 1])))
--pos;
regex.append(&pattern[loc], pos - loc);
const char *s = &pattern[pos];
Expand Down Expand Up @@ -551,7 +551,7 @@ static void insert_escape_class(const char *pattern, size_t pos, convert_flag_ty
wc = Posix::range(name);
if (wc == NULL)
throw regex_error(regex_error::invalid_class, pattern, pos);
if (std::islower(c))
if (std::islower(static_cast<unsigned char>(c)))
{
if (wc[0] <= '\n' && wc[1] >= '\n' && (flags & convert_flag::notnewline))
{
Expand Down Expand Up @@ -745,7 +745,7 @@ static int insert_escape(const char *pattern, size_t len, size_t& pos, convert_f
pos = k;
return -1;
}
else if (std::isalpha(c))
else if (std::isalpha(static_cast<unsigned char>(c)))
{
const char *s = std::strchr(regex_abtnvfr, c);
if (s == NULL)
Expand Down Expand Up @@ -1094,7 +1094,7 @@ static void convert_escape_char(const char *pattern, size_t len, size_t& loc, si
else if (std::strchr(regex_meta, c) == NULL)
{
char buf[3] = { '^', static_cast<char>(lowercase(c)), '\0' };
bool invert = std::isupper(c) != 0;
bool invert = std::isupper(static_cast<unsigned char>(c)) != 0;
if (c == 'n' || (invert && strchr("DHLUWX", c) != NULL))
nl = true;
const char *name = buf + !invert;
Expand Down Expand Up @@ -1286,7 +1286,7 @@ static void convert_escape(const char *pattern, size_t len, size_t& loc, size_t&
throw regex_error(regex_error::invalid_escape, pattern, pos);
if (wc == '\n')
nl = true;
if (std::isalpha(wc) && is_modified(mod, 'i'))
if (std::isalpha(static_cast<unsigned char>(wc)) && is_modified(mod, 'i'))
{
// anycase: translate A to [Aa]
regex.append(&pattern[loc], pos - loc - 1).push_back('[');
Expand Down Expand Up @@ -1335,7 +1335,7 @@ static void convert_escape(const char *pattern, size_t len, size_t& loc, size_t&
if (wc <= 0xFF)
{
// translate \u{X}, \u00XX (convert_flag::u4) and \x{X} to \xXX
if (std::isalpha(wc) && is_modified(mod, 'i'))
if (std::isalpha(static_cast<unsigned char>(wc)) && is_modified(mod, 'i'))
{
// anycase: translate A to [Aa]
regex.append(&pattern[loc], pos - loc - 1).push_back('[');
Expand Down Expand Up @@ -1498,7 +1498,7 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
mods.push_back('m');
size_t k = 2;
bool invert = false;
while (k < len && (pattern[k] == '-' || std::isalpha(pattern[k])))
while (k < len && (pattern[k] == '-' || std::isalpha(static_cast<unsigned char>(pattern[k]))))
{
if (pattern[k] == '-')
{
Expand Down Expand Up @@ -1711,7 +1711,7 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
std::string mods, unmods;
size_t k = pos;
bool invert = false;
while (k < len && (pattern[k] == '-' || std::isalnum(pattern[k])))
while (k < len && (pattern[k] == '-' || std::isalnum(static_cast<unsigned char>(pattern[k]))))
{
if (pattern[k] == '-')
{
Expand Down Expand Up @@ -2006,7 +2006,7 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
}
else
{
if (macros != NULL && pos + 1 < len && (std::isalpha(pattern[pos + 1]) || pattern[pos + 1] == '_' || pattern[pos + 1] == '$' || (pattern[pos + 1] & 0x80) == 0x80))
if (macros != NULL && pos + 1 < len && (std::isalpha(static_cast<unsigned char>(pattern[pos + 1])) || pattern[pos + 1] == '_' || pattern[pos + 1] == '$' || (pattern[pos + 1] & 0x80) == 0x80))
{
// if macros are provided: lookup {name} and expand without converting
regex.append(&pattern[loc], pos - loc);
Expand Down Expand Up @@ -2039,7 +2039,7 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
if (beg)
throw regex_error(regex_error::empty_expression, pattern, pos);
++pos;
if (pos >= len || !std::isdigit(pattern[pos]))
if (pos >= len || !std::isdigit(static_cast<unsigned char>(pattern[pos])))
throw regex_error(regex_error::invalid_repeat, pattern, pos);
char *s;
size_t n = static_cast<size_t>(std::strtoul(&pattern[pos], &s, 10));
Expand Down Expand Up @@ -2204,7 +2204,7 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
beg = false;
break;
default:
if (std::isalpha(pattern[pos]))
if (std::isalpha(static_cast<unsigned char>(pattern[pos])))
{
if (is_modified(mod, 'i'))
{
Expand All @@ -2219,7 +2219,7 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
else if ((c & 0xC0) == 0xC0 && is_modified(mod, 'u'))
{
// Unicode normalization may need to combine a previous ASCII character with a Unicode combining character
if (loc < pos && std::isalpha(pattern[pos - 1]))
if (loc < pos && std::isalpha(static_cast<unsigned char>(pattern[pos - 1])))
--pos;
regex.append(&pattern[loc], pos - loc);
const char *s = &pattern[pos];
Expand Down
2 changes: 1 addition & 1 deletion lib/pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ void Pattern::init_options(const char *options)
case 'z':
for (const char *t = s += (s[1] == '='); *s != ';' && *s != '\0'; ++t)
{
if (std::isspace(*t) || *t == ';' || *t == '\0')
if (std::isspace(static_cast<unsigned char>(*t)) || *t == ';' || *t == '\0')
{
if (t > s + 1)
opt_.z = std::string(s + 1, t - s - 1);
Expand Down
62 changes: 31 additions & 31 deletions src/reflex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,10 @@ static const Reflex::Library library_table[] = {
////////////////////////////////////////////////////////////////////////////////

/// Convert to lower case
inline int lower(int c)
inline char char_tolower(char c)
/// @returns lower case char
{
return std::isalpha(c) ? (c | 0x20) : c;
return static_cast<char>(std::isalpha(static_cast<unsigned char>(c)) ? (c | 0x20) : c);
}

/// Add file extension if not present, modifies the string argument and returns a copy
Expand Down Expand Up @@ -778,7 +778,7 @@ bool Reflex::get_line()
line.push_back(c);
}
linelen = line.length();
while (linelen > 0 && std::isspace(line.at(linelen - 1)))
while (linelen > 0 && std::isspace(static_cast<unsigned char>(line.at(linelen - 1))))
--linelen;
line.resize(linelen);
if (in.eof() && line.empty())
Expand Down Expand Up @@ -828,7 +828,7 @@ bool Reflex::skip_comment(size_t& pos)
/// Match case-insensitive string s while ignoring the rest of the line, return true if OK
bool Reflex::is(const char *s)
{
for (size_t pos = 0; pos < linelen && *s != '\0' && lower(line.at(pos)) == *s; ++pos, ++s)
for (size_t pos = 0; pos < linelen && *s != '\0' && char_tolower(line.at(pos)) == *s; ++pos, ++s)
continue;
return *s == '\0';
}
Expand All @@ -837,9 +837,9 @@ bool Reflex::is(const char *s)
bool Reflex::ins(const char *s)
{
size_t pos = 0;
while (pos < linelen && std::isspace(line.at(pos)))
while (pos < linelen && std::isspace(static_cast<unsigned char>(line.at(pos))))
++pos;
while (pos < linelen && *s != '\0' && lower(line.at(pos)) == *s)
while (pos < linelen && *s != '\0' && char_tolower(line.at(pos)) == *s)
{
++pos;
++s;
Expand All @@ -852,17 +852,17 @@ bool Reflex::br(size_t pos, const char *s)
{
if (s != NULL)
{
if (pos >= linelen || *s == '\0' || lower(line.at(pos)) != *s++)
if (pos >= linelen || *s == '\0' || char_tolower(line.at(pos)) != *s++)
return false;
while (++pos < linelen && *s != '\0' && lower(line.at(pos)) == *s++)
while (++pos < linelen && *s != '\0' && char_tolower(line.at(pos)) == *s++)
continue;
}
while (pos < linelen && std::isspace(line.at(pos)))
while (pos < linelen && std::isspace(static_cast<unsigned char>(line.at(pos))))
++pos;
if (pos >= linelen || line.at(pos) != '{')
return false;
++pos;
while (pos < linelen && std::isspace(line.at(pos)))
while (pos < linelen && std::isspace(static_cast<unsigned char>(line.at(pos))))
++pos;
if (pos >= linelen)
return true;
Expand All @@ -872,19 +872,19 @@ bool Reflex::br(size_t pos, const char *s)
/// Advance pos to match case-insensitive initial part of the string s followed by white space, return true if OK
bool Reflex::as(size_t& pos, const char *s)
{
if (pos >= linelen || *s == '\0' || lower(line.at(pos)) != *s++)
if (pos >= linelen || *s == '\0' || char_tolower(line.at(pos)) != *s++)
return false;
while (++pos < linelen && *s != '\0' && lower(line.at(pos)) == *s++)
while (++pos < linelen && *s != '\0' && char_tolower(line.at(pos)) == *s++)
continue;
return ws(pos);
}

/// Advance pos over whitespace, returns true if whitespace was found
bool Reflex::ws(size_t& pos)
{
if (pos >= linelen || (pos > 0 && !std::isspace(line.at(pos))))
if (pos >= linelen || (pos > 0 && !std::isspace(static_cast<unsigned char>(line.at(pos)))))
return false;
while (pos < linelen && std::isspace(line.at(pos)))
while (pos < linelen && std::isspace(static_cast<unsigned char>(line.at(pos))))
++pos;
return true;
}
Expand All @@ -903,15 +903,15 @@ bool Reflex::eq(size_t& pos)
/// Advance pos to end of line while skipping whitespace, return true if end of line
bool Reflex::nl(size_t& pos)
{
while (pos < linelen && std::isspace(line.at(pos)))
while (pos < linelen && std::isspace(static_cast<unsigned char>(line.at(pos))))
++pos;
return pos >= linelen;
}

/// Check if current line starts a block of code or a comment
bool Reflex::is_code()
{
return linelen > 0 && ((std::isspace(line.at(0)) && options["freespace"].empty()) || is("%{") || is("//") || is("/*"));
return linelen > 0 && ((std::isspace(static_cast<unsigned char>(line.at(0))) && options["freespace"].empty()) || is("%{") || is("//") || is("/*"));
}

/// Check if current line starts a block of %top code
Expand Down Expand Up @@ -941,12 +941,12 @@ bool Reflex::is_begin_code()
/// Advance pos over name (letters, digits, ., -, _ or any non-ASCII character > U+007F), return name
std::string Reflex::get_name(size_t& pos)
{
if (pos >= linelen || (!std::isalnum(line.at(pos)) && line.at(pos) != '_' && (line.at(pos) & 0x80) != 0x80))
if (pos >= linelen || (!std::isalnum(static_cast<unsigned char>(line.at(pos))) && line.at(pos) != '_' && (line.at(pos) & 0x80) != 0x80))
return "";
size_t loc = pos++;
while (pos < linelen)
{
if (!std::isalnum(line.at(pos)) && line.at(pos) != '_' && line.at(pos) != '-' && line.at(pos) != '.' && (line.at(pos) & 0x80) != 0x80)
if (!std::isalnum(static_cast<unsigned char>(line.at(pos))) && line.at(pos) != '_' && line.at(pos) != '-' && line.at(pos) != '.' && (line.at(pos) & 0x80) != 0x80)
break;
++pos;
}
Expand All @@ -961,7 +961,7 @@ std::string Reflex::get_namespace(size_t& pos)
{
if (line.at(pos) == ':' && pos + 1 < linelen && line.at(pos + 1) == ':') // parse ::
++pos;
else if (!std::isalnum(line.at(pos)) && line.at(pos) != '_' && line.at(pos) != '-' && line.at(pos) != '.' && (line.at(pos) & 0x80) != 0x80)
else if (!std::isalnum(static_cast<unsigned char>(line.at(pos))) && line.at(pos) != '_' && line.at(pos) != '-' && line.at(pos) != '.' && (line.at(pos) & 0x80) != 0x80)
break;
++pos;
}
Expand All @@ -971,14 +971,14 @@ std::string Reflex::get_namespace(size_t& pos)
/// Advance pos over option name (letters, digits, +/hyphen/underscore), return name
std::string Reflex::get_option(size_t& pos)
{
if (pos >= linelen || !std::isalnum(line.at(pos)))
if (pos >= linelen || !std::isalnum(static_cast<unsigned char>(line.at(pos))))
return "";
size_t loc = pos++;
while (pos < linelen)
{
if (line.at(pos) == '-' || line.at(pos) == '+') // normalize - and + to _
line[pos] = '_';
else if (!std::isalnum(line.at(pos)) && line.at(pos) != '_')
else if (!std::isalnum(static_cast<unsigned char>(line.at(pos))) && line.at(pos) != '_')
break;
++pos;
}
Expand All @@ -995,7 +995,7 @@ std::string Reflex::get_start(size_t& pos)
{
if (line.at(pos) == '-') // normalize - to _
line[pos] = '_';
else if (!std::isalnum(line.at(pos)) && line.at(pos) != '_' && (line.at(pos) & 0x80) != 0x80)
else if (!std::isalnum(static_cast<unsigned char>(line.at(pos))) && line.at(pos) != '_' && (line.at(pos) & 0x80) != 0x80)
break;
++pos;
}
Expand Down Expand Up @@ -1047,15 +1047,15 @@ bool Reflex::get_pattern(size_t& pos, std::string& pattern, std::string& regex)
if (fsp)
{
if (nsp < pos && (
(c == '{' && (pos + 1 == linelen || line.at(pos + 1) == '}' || std::isspace(line.at(pos + 1)))) ||
(c == '{' && (pos + 1 == linelen || line.at(pos + 1) == '}' || std::isspace(static_cast<unsigned char>(line.at(pos + 1))))) ||
(c == '|' && pos + 1 == linelen) ||
(c == '/' && pos + 1 < linelen && (line.at(pos + 1) == '/' || line.at(pos + 1) == '*'))))
{
pos = nsp;
break;
}
}
else if (std::isspace(c))
else if (std::isspace(static_cast<unsigned char>(c)))
{
break;
}
Expand Down Expand Up @@ -1134,7 +1134,7 @@ bool Reflex::get_pattern(size_t& pos, std::string& pattern, std::string& regex)
++pos;
}
}
if (fsp && !std::isspace(c))
if (fsp && !std::isspace(static_cast<unsigned char>(c)))
nsp = pos;
}
pattern.append(line.substr(loc, pos - loc));
Expand Down Expand Up @@ -1239,7 +1239,7 @@ std::string Reflex::get_code(size_t& pos)
}
else
{
if (blk == 0 && lev == 0 && linelen > 0 && (!std::isspace(line.at(0)) || !options["freespace"].empty()))
if (blk == 0 && lev == 0 && linelen > 0 && (!std::isspace(static_cast<unsigned char>(line.at(0))) || !options["freespace"].empty()))
return code;
code.append("\n").append(line);
}
Expand Down Expand Up @@ -1312,8 +1312,8 @@ std::string Reflex::upper_name(const std::string& s)
std::string t;
for (size_t i = 0; i < s.size(); ++i)
{
if (std::isalnum(s.at(i)))
t.push_back(std::toupper(s.at(i)));
if (std::isalnum(static_cast<unsigned char>(s.at(i))))
t.push_back(static_cast<char>(std::toupper(static_cast<unsigned char>(s.at(i)))));
else
t.push_back('_');
}
Expand All @@ -1337,11 +1337,11 @@ std::string Reflex::param_args(const std::string& params)
if (i <= from)
i = to;
while (--i > from)
if (!std::isspace(params.at(i)))
if (!std::isspace(static_cast<unsigned char>(params.at(i))))
break;
size_t j = i++;
while (--i > from)
if (!std::isalnum(params.at(i)) && params.at(i) != '_')
if (!std::isalnum(static_cast<unsigned char>(params.at(i))) && params.at(i) != '_')
break;
if (!args.empty())
args.append(", ");
Expand All @@ -1355,7 +1355,7 @@ std::string Reflex::param_args(const std::string& params)
bool Reflex::get_starts(size_t& pos, Starts& starts)
{
pos = 0;
if (linelen > 1 && line.at(0) == '<' && (std::isalpha(line.at(1)) || line.at(1) == '_' || line.at(1) == '*' || (line.at(1) & 0x80) == 0x80 || line.at(1) == '^') && line.find('>') != std::string::npos)
if (linelen > 1 && line.at(0) == '<' && (std::isalpha(static_cast<unsigned char>(line.at(1))) || line.at(1) == '_' || line.at(1) == '*' || (line.at(1) & 0x80) == 0x80 || line.at(1) == '^') && line.find('>') != std::string::npos)
{
do
{
Expand Down

0 comments on commit ef36c16

Please sign in to comment.