Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cct and cs2cs: make them robust against UTF-8 BOM at beginning of input files (fixes #3287) #3395

Merged
merged 1 commit into from
Oct 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 36 additions & 21 deletions src/apps/cct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ static void logger(void *data, int level, const char *msg);
static void print(PJ_LOG_LEVEL log_level, const char *fmt, ...);

/* Prototypes from functions in this file */
char *column (char *buf, int n);
PJ_COORD parse_input_line (char *buf, int *columns, double fixed_height, double fixed_time);
static const char *column (const char *buf, int n);
static char *column (char *buf, int n);
PJ_COORD parse_input_line (const char *buf, int *columns, double fixed_height, double fixed_time);


static const char usage[] = {
Expand Down Expand Up @@ -203,9 +204,6 @@ int main(int argc, char **argv) {
OPTARGS *o;
char blank_comment[] = "";
char whitespace[] = " ";
char *comment;
char *comment_delimiter;
char *buf;
int i, nfields = 4, skip_lines = 0, verbose;
double fixed_z = HUGE_VAL, fixed_time = HUGE_VAL;
int decimals_angles = 10;
Expand Down Expand Up @@ -406,7 +404,8 @@ int main(int argc, char **argv) {
direction = PJ_FWD;

/* Allocate input buffer */
buf = static_cast<char*>(calloc (1, 10000));
constexpr int BUFFER_SIZE = 10000;
char* buf = static_cast<char*>(calloc (1, BUFFER_SIZE));
if (nullptr==buf) {
print (PJ_LOG_ERROR, "%s: Out of memory", o->progname);
proj_destroy (P);
Expand All @@ -418,30 +417,43 @@ int main(int argc, char **argv) {


/* Loop over all records of all input files */
int previous_index = -1;
while (opt_input_loop (o, optargs_file_format_text)) {
int err;
void *ret = fgets (buf, 10000, o->input);
char *c = column (buf, 1);
char *bufptr = fgets (buf, BUFFER_SIZE - 1, o->input);
opt_eof_handler (o);
if (nullptr==ret) {
if (nullptr==bufptr) {
print (PJ_LOG_ERROR, "Read error in record %d", (int) o->record_index);
continue;
}
point = parse_input_line (buf, columns_xyzt, fixed_z, fixed_time);

const bool bFirstLine = o->input_index != previous_index;
previous_index = o->input_index;
if( bFirstLine &&
static_cast<uint8_t>(bufptr[0]) == 0xEF &&
static_cast<uint8_t>(bufptr[1]) == 0xBB &&
static_cast<uint8_t>(bufptr[2]) == 0xBF )
{
// Skip UTF-8 Byte Order Marker (BOM)
bufptr += 3;
}

point = parse_input_line (bufptr, columns_xyzt, fixed_z, fixed_time);
if (skip_lines > 0) {
skip_lines--;
continue;
}

/* if it's a comment or blank line, we reflect it */
const char *c = column (bufptr, 1);
if (c && ((*c=='\0') || (*c=='#'))) {
fprintf (fout, "%s", buf);
fprintf (fout, "%s", bufptr);
continue;
}

if (HUGE_VAL==point.xyzt.x) {
/* otherwise, it must be a syntax error */
print (PJ_LOG_NONE, "# Record %d UNREADABLE: %s", (int) o->record_index, buf);
print (PJ_LOG_NONE, "# Record %d UNREADABLE: %s", (int) o->record_index, bufptr);
print (PJ_LOG_ERROR, "%s: Could not parse file '%s' line %d", o->progname, opt_filename (o), opt_record (o));
continue;
}
Expand All @@ -457,27 +469,27 @@ int main(int argc, char **argv) {
if (HUGE_VAL==point.xyzt.x) {
/* transformation error */
print (PJ_LOG_NONE, "# Record %d TRANSFORMATION ERROR: %s (%s)",
(int) o->record_index, buf, proj_errno_string (proj_errno(P)));
(int) o->record_index, bufptr, proj_errno_string (proj_errno(P)));
proj_errno_restore (P, err);
continue;
}
proj_errno_restore (P, err);

/* handle comment string */
comment = column(buf, nfields+1);
char* comment = column(bufptr, nfields+1);
if (opt_given(o, "c")) {
/* what number is the last coordinate column in the input data? */
int colmax = 0;
for (i=0; i<4; i++)
colmax = MAX(colmax, columns_xyzt[i]);
comment = column(buf, colmax+1);
comment = column(bufptr, colmax+1);
}
/* remove the line feed from comment, as logger() above, invoked
by print() below (output), will add one */
size_t len = strlen(comment);
if (len >= 1)
comment[len - 1] = '\0';
comment_delimiter = *comment ? whitespace : blank_comment;
const char* comment_delimiter = *comment ? whitespace : blank_comment;

/* Time to print the result */
/* use same arguments to printf format string for both radians and
Expand Down Expand Up @@ -520,7 +532,7 @@ int main(int argc, char **argv) {


/* return a pointer to the n'th column of buf */
char *column (char *buf, int n) {
static const char *column (const char *buf, int n) {
int i;
if (n <= 0)
return buf;
Expand All @@ -535,19 +547,22 @@ char *column (char *buf, int n) {
return buf;
}

static char *column (char *buf, int n) {
return const_cast<char*>(column(const_cast<const char*>(buf), n));
}

/* column to double */
static double cold (char *args, int col) {
static double cold (const char *args, int col) {
char *endp;
char *target;
double d;
target = column (args, col);
const char* target = column (args, col);
d = proj_strtod (target, &endp);
if (endp==target)
return HUGE_VAL;
return d;
}

PJ_COORD parse_input_line (char *buf, int *columns, double fixed_height, double fixed_time) {
PJ_COORD parse_input_line (const char *buf, int *columns, double fixed_height, double fixed_time) {
PJ_COORD err = proj_coord (HUGE_VAL, HUGE_VAL, HUGE_VAL, HUGE_VAL);
PJ_COORD result = err;
int prev_errno = errno;
Expand Down
16 changes: 14 additions & 2 deletions src/apps/cs2cs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,25 @@ static void process(FILE *fid)
{
char line[MAX_LINE + 3], *s, pline[40];
PJ_UV data;
bool bFirstLine = true;

for (;;) {
for (;; bFirstLine = false ) {
double z;

++emess_dat.File_line;
if (!(s = fgets(line, MAX_LINE, fid)))
break;

if( bFirstLine &&
static_cast<uint8_t>(s[0]) == 0xEF &&
static_cast<uint8_t>(s[1]) == 0xBB &&
static_cast<uint8_t>(s[2]) == 0xBF )
{
// Skip UTF-8 Byte Order Marker (BOM)
s += 3;
}
const char* pszLineAfterBOM = s;

if (!strchr(s, '\n')) { /* overlong line */
int c;
(void)strcat(s, "\n");
Expand Down Expand Up @@ -151,7 +163,7 @@ static void process(FILE *fid)
char temp;
temp = *s;
*s = '\0';
(void)fputs(line, stdout);
(void)fputs(pszLineAfterBOM, stdout);
*s = temp;
putchar('\t');
}
Expand Down
1 change: 1 addition & 0 deletions test/cli/input_with_utf8_bom.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 3 0
3 changes: 3 additions & 0 deletions test/cli/testcct
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ echo "Test cct with WKT in a file" >> ${OUT}
echo "3541657.3778 948984.2343 5201383.5231 2020.5" | $EXE @in.wkt >>${OUT}
rm in.wkt

echo "Test cct with input file with UTF-8 BOM marker" >> ${OUT}
$EXE +proj=noop ${TEST_CLI_DIR}/input_with_utf8_bom.txt >> ${OUT}

# do 'diff' with distribution results
echo "diff ${OUT} with testcct_out.dist"
diff -u ${OUT} ${TEST_CLI_DIR}/testcct_out.dist
Expand Down
2 changes: 2 additions & 0 deletions test/cli/testcct_out.dist
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ Test cct with object code initialization and file input
3541658.5334 948984.5160 5201383.7251 2020.5000
Test cct with WKT in a file
3541657.9112 948983.7503 5201383.2482 2020.5000
Test cct with input file with UTF-8 BOM marker
0.0000 3.0000 0.0000 inf
3 changes: 3 additions & 0 deletions test/cli/testvarious
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,9 @@ $EXE -d 3 -E "WGS 84 + EGM96 height" "WGS 84" >> ${OUT} <<EOF
49 2 0
EOF

echo "##############################################################" >> ${OUT}
echo "Test input file with UTF-8 BOM marker" >> ${OUT}
$EXE -d 3 -E EPSG:4326 EPSG:32631 ${TEST_CLI_DIR}/input_with_utf8_bom.txt >> ${OUT}

# Done!
# do 'diff' with distribution results
Expand Down
3 changes: 3 additions & 0 deletions test/cli/tv_out.dist
Original file line number Diff line number Diff line change
Expand Up @@ -504,3 +504,6 @@ Check that we select the operation that has the smallest area of use, when 2 hav
Check that we promote CRS specified by name to 3D when the other one is 3D
49 2 50 49.000 2.000 4.936
49 2 0 49.000 2.000 45.064
##############################################################
Test input file with UTF-8 BOM marker
0 3 0 500000.000 0.000 0.000