diff --git a/src/header/common/content_disposition.rs b/src/header/common/content_disposition.rs index cefb62757b..edb27fecd2 100644 --- a/src/header/common/content_disposition.rs +++ b/src/header/common/content_disposition.rs @@ -8,11 +8,11 @@ use language_tags::LanguageTag; use std::fmt; -use std::str::FromStr; use unicase::UniCase; use url::percent_encoding; use header::{Header, HeaderFormat, parsing}; +use header::parsing::parse_extended_value; use header::shared::Charset; /// The implied disposition of the content of the HTTP body @@ -133,8 +133,8 @@ impl Header for ContentDisposition { Charset::Ext("UTF-8".to_owned()), None, val.trim_matches('"').as_bytes().to_owned()) } else if UniCase(&*key) == UniCase("filename*") { - let (charset, opt_language, value) = try!(parse_ext_value(val)); - DispositionParam::Filename(charset, opt_language, value) + let extended_value = try!(parse_extended_value(val)); + DispositionParam::Filename(extended_value.charset, extended_value.language_tag, extended_value.value) } else { DispositionParam::Ext(key.to_owned(), val.trim_matches('"').to_owned()) } @@ -195,68 +195,6 @@ impl fmt::Display for ContentDisposition { } } -/// Parsing of `ext-value` -/// https://tools.ietf.org/html/rfc5987#section-3.2 -/// -/// # ABNF -/// ```plain -/// ext-value = charset "'" [ language ] "'" value-chars -/// ; like RFC 2231's -/// ; (see [RFC2231], Section 7) -/// -/// charset = "UTF-8" / "ISO-8859-1" / mime-charset -/// -/// mime-charset = 1*mime-charsetc -/// mime-charsetc = ALPHA / DIGIT -/// / "!" / "#" / "$" / "%" / "&" -/// / "+" / "-" / "^" / "_" / "`" -/// / "{" / "}" / "~" -/// ; as in Section 2.3 of [RFC2978] -/// ; except that the single quote is not included -/// ; SHOULD be registered in the IANA charset registry -/// -/// language = -/// -/// value-chars = *( pct-encoded / attr-char ) -/// -/// pct-encoded = "%" HEXDIG HEXDIG -/// ; see [RFC3986], Section 2.1 -/// -/// attr-char = ALPHA / DIGIT -/// / "!" / "#" / "$" / "&" / "+" / "-" / "." -/// / "^" / "_" / "`" / "|" / "~" -/// ; token except ( "*" / "'" / "%" ) -/// ``` -fn parse_ext_value(val: &str) -> ::Result<(Charset, Option, Vec)> { - - // Break into three pieces separated by the single-quote character - let mut parts = val.splitn(3,'\''); - - // Interpret the first piece as a Charset - let charset: Charset = match parts.next() { - None => return Err(::Error::Header), - Some(n) => try!(FromStr::from_str(n)), - }; - - // Interpret the second piece as a language tag - let lang: Option = match parts.next() { - None => return Err(::Error::Header), - Some("") => None, - Some(s) => match s.parse() { - Ok(lt) => Some(lt), - Err(_) => return Err(::Error::Header), - } - }; - - // Interpret the third piece as a sequence of value characters - let value: Vec = match parts.next() { - None => return Err(::Error::Header), - Some(v) => percent_encoding::percent_decode(v.as_bytes()), - }; - - Ok( (charset, lang, value) ) -} - #[cfg(test)] mod tests { use super::{ContentDisposition,DispositionType,DispositionParam}; diff --git a/src/header/parsing.rs b/src/header/parsing.rs index 15ce8ecc9d..80e1999546 100644 --- a/src/header/parsing.rs +++ b/src/header/parsing.rs @@ -1,7 +1,12 @@ //! Utility functions for Header implementations. +use language_tags::LanguageTag; use std::str; +use std::str::FromStr; use std::fmt::{self, Display}; +use url::percent_encoding; + +use header::shared::Charset; /// Reads a single raw string when parsing a header. pub fn from_one_raw_str(raw: &[Vec]) -> ::Result { @@ -48,3 +53,131 @@ pub fn fmt_comma_delimited(f: &mut fmt::Formatter, parts: &[T]) -> f } Ok(()) } + +/// An extended header parameter value (i.e., tagged with a character set and optionally, +/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). +pub struct ExtendedValue { + pub charset: Charset, + pub language_tag: Option, + pub value: Vec, +} + +/// Parses extended header parameter values (`ext-value`), as defined in +/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). +/// +/// Extended values are denoted by parameter names that end with `*`. +/// +/// ## ABNF +/// ```plain +/// ext-value = charset "'" [ language ] "'" value-chars +/// ; like RFC 2231's +/// ; (see [RFC2231], Section 7) +/// +/// charset = "UTF-8" / "ISO-8859-1" / mime-charset +/// +/// mime-charset = 1*mime-charsetc +/// mime-charsetc = ALPHA / DIGIT +/// / "!" / "#" / "$" / "%" / "&" +/// / "+" / "-" / "^" / "_" / "`" +/// / "{" / "}" / "~" +/// ; as in Section 2.3 of [RFC2978] +/// ; except that the single quote is not included +/// ; SHOULD be registered in the IANA charset registry +/// +/// language = +/// +/// value-chars = *( pct-encoded / attr-char ) +/// +/// pct-encoded = "%" HEXDIG HEXDIG +/// ; see [RFC3986], Section 2.1 +/// +/// attr-char = ALPHA / DIGIT +/// / "!" / "#" / "$" / "&" / "+" / "-" / "." +/// / "^" / "_" / "`" / "|" / "~" +/// ; token except ( "*" / "'" / "%" ) +/// ``` +pub fn parse_extended_value(val: &str) -> ::Result { + + // Break into three pieces separated by the single-quote character + let mut parts = val.splitn(3,'\''); + + // Interpret the first piece as a Charset + let charset: Charset = match parts.next() { + None => return Err(::Error::Header), + Some(n) => try!(FromStr::from_str(n)), + }; + + // Interpret the second piece as a language tag + let lang: Option = match parts.next() { + None => return Err(::Error::Header), + Some("") => None, + Some(s) => match s.parse() { + Ok(lt) => Some(lt), + Err(_) => return Err(::Error::Header), + } + }; + + // Interpret the third piece as a sequence of value characters + let value: Vec = match parts.next() { + None => return Err(::Error::Header), + Some(v) => percent_encoding::percent_decode(v.as_bytes()), + }; + + Ok(ExtendedValue { + charset: charset, + language_tag: lang, + value: value, + }) +} + +#[cfg(test)] +mod tests { + use header::shared::Charset; + use super::parse_extended_value; + + #[test] + fn test_parse_extended_value_with_encoding_and_language_tag() { + let expected_language_tag = langtag!(en); + // RFC 5987, Section 3.2.2 + // Extended notation, using the Unicode character U+00A3 (POUND SIGN) + let result = parse_extended_value("iso-8859-1'en'%A3%20rates"); + assert!(result.is_ok()); + let extended_value = result.unwrap(); + assert_eq!(Charset::Iso_8859_1, extended_value.charset); + assert!(extended_value.language_tag.is_some()); + assert_eq!(expected_language_tag, extended_value.language_tag.unwrap()); + assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value); + } + + #[test] + fn test_parse_extended_value_with_encoding() { + // RFC 5987, Section 3.2.2 + // Extended notation, using the Unicode characters U+00A3 (POUND SIGN) + // and U+20AC (EURO SIGN) + let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates"); + assert!(result.is_ok()); + let extended_value = result.unwrap(); + assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset); + assert!(extended_value.language_tag.is_none()); + assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value); + } + + #[test] + fn test_parse_extended_value_missing_language_tag_and_encoding() { + // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2 + let result = parse_extended_value("foo%20bar.html"); + assert!(result.is_err()); + } + + #[test] + fn test_parse_extended_value_partially_formatted() { + let result = parse_extended_value("UTF-8'missing third part"); + assert!(result.is_err()); + } + + #[test] + fn test_parse_extended_value_partially_formatted_blank() { + let result = parse_extended_value("blank second part'"); + assert!(result.is_err()); + } +}