Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No panic on timestamp buffer overflow #3519

Merged
merged 2 commits into from
Jan 13, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,13 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
// timezone offset, using ' ' as a separator
// Example: 2020-09-08 13:42:29.190855-05:00
if let Ok(ts) = DateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f%:z") {
return Ok(ts.timestamp_nanos());
return to_timestamp_nanos(ts.naive_utc());
}

// with an explicit Z, using ' ' as a separator
// Example: 2020-09-08 13:42:29Z
if let Ok(ts) = Utc.datetime_from_str(s, "%Y-%m-%d %H:%M:%S%.fZ") {
return Ok(ts.timestamp_nanos());
return to_timestamp_nanos(ts.naive_utc());
}

// Support timestamps without an explicit timezone offset, again
Expand All @@ -99,7 +99,7 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
// without a timezone specifier as a local time, using T as a separator
// Example: 2020-09-08T13:42:29.190855
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f") {
return Ok(ts.timestamp_nanos());
return to_timestamp_nanos(ts);
}

// without a timezone specifier as a local time, using T as a
Expand All @@ -112,7 +112,7 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
// without a timezone specifier as a local time, using ' ' as a separator
// Example: 2020-09-08 13:42:29.190855
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f") {
return Ok(ts.timestamp_nanos());
return to_timestamp_nanos(ts);
}

// without a timezone specifier as a local time, using ' ' as a
Expand Down Expand Up @@ -141,6 +141,18 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
)))
}

/// Defensive check to prevent chrono-rs panics when nanosecond conversion happens on non-supported dates
#[inline]
fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
if dt.timestamp().checked_mul(1_000_000_000).is_none() {
return Err(ArrowError::ParseError(
ERR_NANOSECONDS_NOT_SUPPORTED.to_string(),
));
}

Ok(dt.timestamp_nanos())
}

/// Accepts a string in ISO8601 standard format and some
/// variants and converts it to nanoseconds since midnight.
///
Expand Down Expand Up @@ -373,6 +385,9 @@ impl Parser for Time32SecondType {
/// Number of days between 0001-01-01 and 1970-01-01
const EPOCH_DAYS_FROM_CE: i32 = 719_163;

/// Error message if nanosecond conversion request beyond supported interval
const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️


impl Parser for Date32Type {
fn parse(string: &str) -> Option<i32> {
let date = string.parse::<chrono::NaiveDate>().ok()?;
Expand Down Expand Up @@ -845,4 +860,11 @@ mod tests {
Some(7_801)
);
}

#[test]
fn string_to_timestamp_old() {
parse_timestamp("1677-06-14T07:29:01.256")
.map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
.unwrap_err();
}
}