diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..0a80641 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +* text eol=lf + +*.rs text diff=rust +*.toml text diff=toml +Cargo.lock text \ No newline at end of file diff --git a/examples/merge.rs b/examples/merge.rs index afeb1ee..977dc17 100644 --- a/examples/merge.rs +++ b/examples/merge.rs @@ -1,297 +1,297 @@ -// if you use nightly then you can enable this feature to gain a boost in read speed of PDF's" -//#![feature(extend_one)] - -#[macro_use] -extern crate lopdf; - -use std::collections::BTreeMap; - -use lopdf::content::{Content, Operation}; -use lopdf::{Bookmark, Document, Object, ObjectId, Stream}; - -pub fn generate_fake_document() -> Document { - let mut doc = Document::with_version("1.5"); - let pages_id = doc.new_object_id(); - let font_id = doc.add_object(dictionary! { - "Type" => "Font", - "Subtype" => "Type1", - "BaseFont" => "Courier", - }); - let resources_id = doc.add_object(dictionary! { - "Font" => dictionary! { - "F1" => font_id, - }, - }); - let content = Content { - operations: vec![ - Operation::new("BT", vec![]), - Operation::new("Tf", vec!["F1".into(), 48.into()]), - Operation::new("Td", vec![100.into(), 600.into()]), - Operation::new("Tj", vec![Object::string_literal("Hello World!")]), - Operation::new("ET", vec![]), - ], - }; - let content_id = doc.add_object(Stream::new(dictionary! {}, content.encode().unwrap())); - let page_id = doc.add_object(dictionary! { - "Type" => "Page", - "Parent" => pages_id, - "Contents" => content_id, - "Resources" => resources_id, - "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()], - }); - let pages = dictionary! { - "Type" => "Pages", - "Kids" => vec![page_id.into()], - "Count" => 1, - }; - doc.objects.insert(pages_id, Object::Dictionary(pages)); - let catalog_id = doc.add_object(dictionary! { - "Type" => "Catalog", - "Pages" => pages_id, - }); - doc.trailer.set("Root", catalog_id); - - doc -} - -fn main() { - // Generate a stack of Documents to merge - // (The Bookmark layer, Document to merge) - let documents = vec![ - (1u32, generate_fake_document()), - (2u32, generate_fake_document()), - (2u32, generate_fake_document()), - (3u32, generate_fake_document()), - ]; - - // We use this to keep track of the last Parent per layer depth. - let mut layer_parent: [Option; 4] = [None; 4]; - - // This is the last layer ran. - let mut last_layer = 0; - - // Define a starting max_id (will be used as start index for object_ids) - let mut max_id = 1; - let mut pagenum = 1; - // Collect all Documents Objects grouped by a map - let mut documents_pages = BTreeMap::new(); - let mut documents_objects = BTreeMap::new(); - let mut document = Document::with_version("1.5"); - - // Lets try to set these to be bigger to avoid multi allocations for faster handling of files. - // We are just saying each Document it about 1000 objects in size. can be adjusted for better speeds. - // This can only be used if you use nightly or the #![feature(extend_one)] is stablized. - // documents_pages.extend_reserve(documents.len() * 1000); - // documents_objects.extend_reserve(documents.len() * 1000); - - // Add a Table of Contents - // We set the object page to (0,0) which means it will point to the first object after it. - layer_parent[0] = Some(document.add_bookmark( - Bookmark::new("Table of Contents".to_string(), [0.0, 0.0, 0.0], 0, (0, 0)), - None, - )); - - // Can set bookmark formatting and color per report bookmark added. - // Formating is 1 for italic 2 for bold 3 for bold and italic - // Color is RGB 0.0..255.0 - for (layer, mut doc) in documents { - let color = [0.0, 0.0, 0.0]; - let format = 0; - let mut display = String::new(); - - doc.renumber_objects_with(max_id); - - max_id = doc.max_id + 1; - - let mut first_object = None; - - let pages = doc.get_pages(); - - // This is actually better than extend as we use less allocations and cloning then. - pages - .into_iter() - .map(|(_, object_id)| { - // We use this as the return object for Bookmarking to deturmine what it points too. - // We only want to do this for the first page though. - if first_object.is_none() { - first_object = Some(object_id); - display = format!("Page {}", pagenum); - pagenum += 1; - } - - (object_id, doc.get_object(object_id).unwrap().to_owned()) - }) - .for_each(|(key, value)| { - documents_pages.insert(key, value); - }); - - documents_objects.extend(doc.objects); - - // Lets shadow our pointer back if nothing then set to (0,0) tto point to the next page - let object = first_object.unwrap_or((0, 0)); - - // This will use the layering to implement children under Parents in the bookmarks - // Example as we are generating it here. - // Table of Contents - // - Page 1 - // -- Page 2 - // -- Page 3 - // --- Page 4 - - if layer == 0 { - layer_parent[0] = Some(document.add_bookmark(Bookmark::new(display, color, format, object), None)); - last_layer = 0; - } else if layer == 1 { - layer_parent[1] = - Some(document.add_bookmark(Bookmark::new(display, color, format, object), layer_parent[0])); - last_layer = 1; - } else if last_layer >= layer || last_layer == layer - 1 { - layer_parent[layer as usize] = Some(document.add_bookmark( - Bookmark::new(display, color, format, object), - layer_parent[(layer - 1) as usize], - )); - last_layer = layer; - } else if last_layer > 0 { - layer_parent[last_layer as usize] = Some(document.add_bookmark( - Bookmark::new(display, color, format, object), - layer_parent[(last_layer - 1) as usize], - )); - } else { - layer_parent[1] = - Some(document.add_bookmark(Bookmark::new(display, color, format, object), layer_parent[0])); - last_layer = 1; - } - } - - // Catalog and Pages are mandatory - let mut catalog_object: Option<(ObjectId, Object)> = None; - let mut pages_object: Option<(ObjectId, Object)> = None; - - // Process all objects except "Page" type - for (object_id, object) in documents_objects.into_iter() { - // We have to ignore "Page" (as are processed later), "Outlines" and "Outline" objects - // All other objects should be collected and inserted into the main Document - match object.type_name().unwrap_or("") { - "Catalog" => { - // Collect a first "Catalog" object and use it for the future "Pages" - catalog_object = Some(( - if let Some((id, _)) = catalog_object { - id - } else { - object_id - }, - object, - )); - } - "Pages" => { - // Collect and update a first "Pages" object and use it for the future "Catalog" - // We have also to merge all dictionaries of the old and the new "Pages" object - if let Ok(dictionary) = object.as_dict() { - let mut dictionary = dictionary.clone(); - if let Some((_, ref object)) = pages_object { - if let Ok(old_dictionary) = object.as_dict() { - dictionary.extend(old_dictionary); - } - } - - pages_object = Some(( - if let Some((id, _)) = pages_object { - id - } else { - object_id - }, - Object::Dictionary(dictionary), - )); - } - } - "Page" => {} // Ignored, processed later and separately - "Outlines" => {} // Ignored, not supported yet - "Outline" => {} // Ignored, not supported yet - _ => { - document.objects.insert(object_id, object); - } - } - } - - // If no "Pages" found abort - if pages_object.is_none() { - println!("Pages root not found."); - - return; - } - - // Iter over all "Page" and collect with the parent "Pages" created before - for (object_id, object) in documents_pages.iter() { - if let Ok(dictionary) = object.as_dict() { - let mut dictionary = dictionary.clone(); - dictionary.set("Parent", pages_object.as_ref().unwrap().0); - - document.objects.insert(*object_id, Object::Dictionary(dictionary)); - } - } - - // If no "Catalog" found abort - if catalog_object.is_none() { - println!("Catalog root not found."); - - return; - } - - let (catalog_id, catalog_object) = catalog_object.unwrap(); - let (page_id, page_object) = pages_object.unwrap(); - - // Build a new "Pages" with updated fields - if let Ok(dictionary) = page_object.as_dict() { - let mut dictionary = dictionary.clone(); - - // Set new pages count - dictionary.set("Count", documents_pages.len() as u32); - - // Set new "Kids" list (collected from documents pages) for "Pages" - dictionary.set( - "Kids", - documents_pages - .into_iter() - .map(|(object_id, _)| Object::Reference(object_id)) - .collect::>(), - ); - - document.objects.insert(page_id, Object::Dictionary(dictionary)); - } - - // Build a new "Catalog" with updated fields - if let Ok(dictionary) = catalog_object.as_dict() { - let mut dictionary = dictionary.clone(); - dictionary.set("Pages", page_id); - dictionary.set("PageMode", "UseOutlines"); - dictionary.remove(b"Outlines"); // Outlines not supported in merged PDFs - - document.objects.insert(catalog_id, Object::Dictionary(dictionary)); - } - - document.trailer.set("Root", catalog_id); - - // Update the max internal ID as wasn't updated before due to direct objects insertion - document.max_id = document.objects.len() as u32; - - // Reorder all new Document objects - document.renumber_objects(); - - //Set any Bookmarks to the First child if they are not set to a page - document.adjust_zero_pages(); - - //Set all bookmarks to the PDF Object tree then set the Outlines to the Bookmark content map. - if let Some(n) = document.build_outline() { - if let Ok(Object::Dictionary(ref mut dict)) = document.get_object_mut(catalog_id) { - dict.set("Outlines", Object::Reference(n)); - } - } - - // Most of the time this does nothing unless there are a lot of streams - // Can be disabled to speed up the process. - // document.compress(); - - // Save the merged PDF - // Store file in current working directory. - document.save("merged.pdf").unwrap(); -} +// if you use nightly then you can enable this feature to gain a boost in read speed of PDF's" +//#![feature(extend_one)] + +#[macro_use] +extern crate lopdf; + +use std::collections::BTreeMap; + +use lopdf::content::{Content, Operation}; +use lopdf::{Bookmark, Document, Object, ObjectId, Stream}; + +pub fn generate_fake_document() -> Document { + let mut doc = Document::with_version("1.5"); + let pages_id = doc.new_object_id(); + let font_id = doc.add_object(dictionary! { + "Type" => "Font", + "Subtype" => "Type1", + "BaseFont" => "Courier", + }); + let resources_id = doc.add_object(dictionary! { + "Font" => dictionary! { + "F1" => font_id, + }, + }); + let content = Content { + operations: vec![ + Operation::new("BT", vec![]), + Operation::new("Tf", vec!["F1".into(), 48.into()]), + Operation::new("Td", vec![100.into(), 600.into()]), + Operation::new("Tj", vec![Object::string_literal("Hello World!")]), + Operation::new("ET", vec![]), + ], + }; + let content_id = doc.add_object(Stream::new(dictionary! {}, content.encode().unwrap())); + let page_id = doc.add_object(dictionary! { + "Type" => "Page", + "Parent" => pages_id, + "Contents" => content_id, + "Resources" => resources_id, + "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()], + }); + let pages = dictionary! { + "Type" => "Pages", + "Kids" => vec![page_id.into()], + "Count" => 1, + }; + doc.objects.insert(pages_id, Object::Dictionary(pages)); + let catalog_id = doc.add_object(dictionary! { + "Type" => "Catalog", + "Pages" => pages_id, + }); + doc.trailer.set("Root", catalog_id); + + doc +} + +fn main() { + // Generate a stack of Documents to merge + // (The Bookmark layer, Document to merge) + let documents = vec![ + (1u32, generate_fake_document()), + (2u32, generate_fake_document()), + (2u32, generate_fake_document()), + (3u32, generate_fake_document()), + ]; + + // We use this to keep track of the last Parent per layer depth. + let mut layer_parent: [Option; 4] = [None; 4]; + + // This is the last layer ran. + let mut last_layer = 0; + + // Define a starting max_id (will be used as start index for object_ids) + let mut max_id = 1; + let mut pagenum = 1; + // Collect all Documents Objects grouped by a map + let mut documents_pages = BTreeMap::new(); + let mut documents_objects = BTreeMap::new(); + let mut document = Document::with_version("1.5"); + + // Lets try to set these to be bigger to avoid multi allocations for faster handling of files. + // We are just saying each Document it about 1000 objects in size. can be adjusted for better speeds. + // This can only be used if you use nightly or the #![feature(extend_one)] is stablized. + // documents_pages.extend_reserve(documents.len() * 1000); + // documents_objects.extend_reserve(documents.len() * 1000); + + // Add a Table of Contents + // We set the object page to (0,0) which means it will point to the first object after it. + layer_parent[0] = Some(document.add_bookmark( + Bookmark::new("Table of Contents".to_string(), [0.0, 0.0, 0.0], 0, (0, 0)), + None, + )); + + // Can set bookmark formatting and color per report bookmark added. + // Formating is 1 for italic 2 for bold 3 for bold and italic + // Color is RGB 0.0..255.0 + for (layer, mut doc) in documents { + let color = [0.0, 0.0, 0.0]; + let format = 0; + let mut display = String::new(); + + doc.renumber_objects_with(max_id); + + max_id = doc.max_id + 1; + + let mut first_object = None; + + let pages = doc.get_pages(); + + // This is actually better than extend as we use less allocations and cloning then. + pages + .into_iter() + .map(|(_, object_id)| { + // We use this as the return object for Bookmarking to deturmine what it points too. + // We only want to do this for the first page though. + if first_object.is_none() { + first_object = Some(object_id); + display = format!("Page {}", pagenum); + pagenum += 1; + } + + (object_id, doc.get_object(object_id).unwrap().to_owned()) + }) + .for_each(|(key, value)| { + documents_pages.insert(key, value); + }); + + documents_objects.extend(doc.objects); + + // Lets shadow our pointer back if nothing then set to (0,0) tto point to the next page + let object = first_object.unwrap_or((0, 0)); + + // This will use the layering to implement children under Parents in the bookmarks + // Example as we are generating it here. + // Table of Contents + // - Page 1 + // -- Page 2 + // -- Page 3 + // --- Page 4 + + if layer == 0 { + layer_parent[0] = Some(document.add_bookmark(Bookmark::new(display, color, format, object), None)); + last_layer = 0; + } else if layer == 1 { + layer_parent[1] = + Some(document.add_bookmark(Bookmark::new(display, color, format, object), layer_parent[0])); + last_layer = 1; + } else if last_layer >= layer || last_layer == layer - 1 { + layer_parent[layer as usize] = Some(document.add_bookmark( + Bookmark::new(display, color, format, object), + layer_parent[(layer - 1) as usize], + )); + last_layer = layer; + } else if last_layer > 0 { + layer_parent[last_layer as usize] = Some(document.add_bookmark( + Bookmark::new(display, color, format, object), + layer_parent[(last_layer - 1) as usize], + )); + } else { + layer_parent[1] = + Some(document.add_bookmark(Bookmark::new(display, color, format, object), layer_parent[0])); + last_layer = 1; + } + } + + // Catalog and Pages are mandatory + let mut catalog_object: Option<(ObjectId, Object)> = None; + let mut pages_object: Option<(ObjectId, Object)> = None; + + // Process all objects except "Page" type + for (object_id, object) in documents_objects.into_iter() { + // We have to ignore "Page" (as are processed later), "Outlines" and "Outline" objects + // All other objects should be collected and inserted into the main Document + match object.type_name().unwrap_or("") { + "Catalog" => { + // Collect a first "Catalog" object and use it for the future "Pages" + catalog_object = Some(( + if let Some((id, _)) = catalog_object { + id + } else { + object_id + }, + object, + )); + } + "Pages" => { + // Collect and update a first "Pages" object and use it for the future "Catalog" + // We have also to merge all dictionaries of the old and the new "Pages" object + if let Ok(dictionary) = object.as_dict() { + let mut dictionary = dictionary.clone(); + if let Some((_, ref object)) = pages_object { + if let Ok(old_dictionary) = object.as_dict() { + dictionary.extend(old_dictionary); + } + } + + pages_object = Some(( + if let Some((id, _)) = pages_object { + id + } else { + object_id + }, + Object::Dictionary(dictionary), + )); + } + } + "Page" => {} // Ignored, processed later and separately + "Outlines" => {} // Ignored, not supported yet + "Outline" => {} // Ignored, not supported yet + _ => { + document.objects.insert(object_id, object); + } + } + } + + // If no "Pages" found abort + if pages_object.is_none() { + println!("Pages root not found."); + + return; + } + + // Iter over all "Page" and collect with the parent "Pages" created before + for (object_id, object) in documents_pages.iter() { + if let Ok(dictionary) = object.as_dict() { + let mut dictionary = dictionary.clone(); + dictionary.set("Parent", pages_object.as_ref().unwrap().0); + + document.objects.insert(*object_id, Object::Dictionary(dictionary)); + } + } + + // If no "Catalog" found abort + if catalog_object.is_none() { + println!("Catalog root not found."); + + return; + } + + let (catalog_id, catalog_object) = catalog_object.unwrap(); + let (page_id, page_object) = pages_object.unwrap(); + + // Build a new "Pages" with updated fields + if let Ok(dictionary) = page_object.as_dict() { + let mut dictionary = dictionary.clone(); + + // Set new pages count + dictionary.set("Count", documents_pages.len() as u32); + + // Set new "Kids" list (collected from documents pages) for "Pages" + dictionary.set( + "Kids", + documents_pages + .into_iter() + .map(|(object_id, _)| Object::Reference(object_id)) + .collect::>(), + ); + + document.objects.insert(page_id, Object::Dictionary(dictionary)); + } + + // Build a new "Catalog" with updated fields + if let Ok(dictionary) = catalog_object.as_dict() { + let mut dictionary = dictionary.clone(); + dictionary.set("Pages", page_id); + dictionary.set("PageMode", "UseOutlines"); + dictionary.remove(b"Outlines"); // Outlines not supported in merged PDFs + + document.objects.insert(catalog_id, Object::Dictionary(dictionary)); + } + + document.trailer.set("Root", catalog_id); + + // Update the max internal ID as wasn't updated before due to direct objects insertion + document.max_id = document.objects.len() as u32; + + // Reorder all new Document objects + document.renumber_objects(); + + //Set any Bookmarks to the First child if they are not set to a page + document.adjust_zero_pages(); + + //Set all bookmarks to the PDF Object tree then set the Outlines to the Bookmark content map. + if let Some(n) = document.build_outline() { + if let Ok(Object::Dictionary(ref mut dict)) = document.get_object_mut(catalog_id) { + dict.set("Outlines", Object::Reference(n)); + } + } + + // Most of the time this does nothing unless there are a lot of streams + // Can be disabled to speed up the process. + // document.compress(); + + // Save the merged PDF + // Store file in current working directory. + document.save("merged.pdf").unwrap(); +} diff --git a/src/lib.rs b/src/lib.rs index bded8c1..959de6f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ pub use crate::object::{Dictionary, Object, ObjectId, Stream, StringFormat}; mod document; mod incremental_document; mod object_stream; -mod xref; +pub mod xref; pub use crate::document::Document; pub use crate::incremental_document::IncrementalDocument; diff --git a/src/object.rs b/src/object.rs index f135725..439fd05 100644 --- a/src/object.rs +++ b/src/object.rs @@ -1,651 +1,651 @@ -use crate::{Document, Error, Result}; -use linked_hash_map::{self, Iter, IterMut, LinkedHashMap}; -use log::warn; -use std::fmt; -use std::str; - -/// Object identifier consists of two parts: object number and generation number. -pub type ObjectId = (u32, u16); - -/// Dictionary object. -#[derive(Clone, Default)] -pub struct Dictionary(LinkedHashMap, Object>); - -/// Stream object -/// Warning - all streams must be indirect objects, while -/// the stream dictionary may be a direct object -#[derive(Debug, Clone)] -pub struct Stream { - /// Associated stream dictionary - pub dict: Dictionary, - /// Contents of the stream in bytes - pub content: Vec, - /// Can the stream be compressed by the `Document::compress()` function? - /// Font streams may not be compressed, for example - pub allows_compression: bool, - /// Stream data's position in PDF file. - pub start_position: Option, -} - -/// Basic PDF object types defined in an enum. -#[derive(Clone)] -pub enum Object { - Null, - Boolean(bool), - Integer(i64), - Real(f64), - Name(Vec), - String(Vec, StringFormat), - Array(Vec), - Dictionary(Dictionary), - Stream(Stream), - Reference(ObjectId), -} - -/// String objects can be written in two formats. -#[derive(Debug, Clone)] -pub enum StringFormat { - Literal, - Hexadecimal, -} - -impl Default for StringFormat { - fn default() -> StringFormat { - StringFormat::Literal - } -} - -impl From for Object { - fn from(value: bool) -> Self { - Object::Boolean(value) - } -} - -impl From for Object { - fn from(number: i64) -> Self { - Object::Integer(number) - } -} - -macro_rules! from_smaller_ints { - ($( $Int: ty )+) => { - $( - impl From<$Int> for Object { - fn from(number: $Int) -> Self { - Object::Integer(i64::from(number)) - } - } - )+ - } -} - -from_smaller_ints! { - i8 i16 i32 - u8 u16 u32 -} - -impl From for Object { - fn from(number: f64) -> Self { - Object::Real(number) - } -} - -impl From for Object { - fn from(number: f32) -> Self { - Object::Real(f64::from(number)) - } -} - -impl From for Object { - fn from(name: String) -> Self { - Object::Name(name.into_bytes()) - } -} - -impl<'a> From<&'a str> for Object { - fn from(name: &'a str) -> Self { - Object::Name(name.as_bytes().to_vec()) - } -} - -impl From> for Object { - fn from(array: Vec) -> Self { - Object::Array(array) - } -} - -impl From for Object { - fn from(dcit: Dictionary) -> Self { - Object::Dictionary(dcit) - } -} - -impl From for Object { - fn from(stream: Stream) -> Self { - Object::Stream(stream) - } -} - -impl From for Object { - fn from(id: ObjectId) -> Self { - Object::Reference(id) - } -} - -impl Object { - pub fn string_literal>>(s: S) -> Self { - Object::String(s.into(), StringFormat::Literal) - } - - pub fn is_null(&self) -> bool { - matches!(*self, Object::Null) - } - - pub fn as_bool(&self) -> Result { - match *self { - Object::Boolean(ref value) => Ok(*value), - _ => Err(Error::Type), - } - } - - pub fn as_i64(&self) -> Result { - match *self { - Object::Integer(ref value) => Ok(*value), - _ => Err(Error::Type), - } - } - - pub fn as_f64(&self) -> Result { - match *self { - Object::Real(ref value) => Ok(*value), - _ => Err(Error::Type), - } - } - - /// Get the object value as a float. - /// Unlike as_f64() this will also cast an Integer to a Real. - pub fn as_float(&self) -> Result { - match *self { - Object::Integer(ref value) => Ok(*value as f64), - Object::Real(ref value) => Ok(*value), - _ => Err(Error::Type), - } - } - - pub fn as_name(&self) -> Result<&[u8]> { - match *self { - Object::Name(ref name) => Ok(name), - _ => Err(Error::Type), - } - } - - pub fn as_name_str(&self) -> Result<&str> { - Ok(str::from_utf8(self.as_name()?)?) - } - - pub fn as_str(&self) -> Result<&[u8]> { - match self { - Object::String(string, _) => Ok(string), - _ => Err(Error::Type), - } - } - - pub fn as_str_mut(&mut self) -> Result<&mut Vec> { - match self { - Object::String(string, _) => Ok(string), - _ => Err(Error::Type), - } - } - - pub fn as_reference(&self) -> Result { - match *self { - Object::Reference(ref id) => Ok(*id), - _ => Err(Error::Type), - } - } - - pub fn as_array(&self) -> Result<&Vec> { - match *self { - Object::Array(ref arr) => Ok(arr), - _ => Err(Error::Type), - } - } - - pub fn as_array_mut(&mut self) -> Result<&mut Vec> { - match *self { - Object::Array(ref mut arr) => Ok(arr), - _ => Err(Error::Type), - } - } - - pub fn as_dict(&self) -> Result<&Dictionary> { - match *self { - Object::Dictionary(ref dict) => Ok(dict), - _ => Err(Error::Type), - } - } - - pub fn as_dict_mut(&mut self) -> Result<&mut Dictionary> { - match *self { - Object::Dictionary(ref mut dict) => Ok(dict), - _ => Err(Error::Type), - } - } - - pub fn as_stream(&self) -> Result<&Stream> { - match *self { - Object::Stream(ref stream) => Ok(stream), - _ => Err(Error::Type), - } - } - - pub fn as_stream_mut(&mut self) -> Result<&mut Stream> { - match *self { - Object::Stream(ref mut stream) => Ok(stream), - _ => Err(Error::Type), - } - } - - pub fn type_name(&self) -> Result<&str> { - match *self { - Object::Dictionary(ref dict) => dict.type_name(), - Object::Stream(ref stream) => stream.dict.type_name(), - _ => Err(Error::Type), - } - } -} - -impl fmt::Debug for Object { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - Object::Null => f.write_str("null"), - Object::Boolean(ref value) => { - if *value { - f.write_str("true") - } else { - f.write_str("false") - } - } - Object::Integer(ref value) => write!(f, "{}", *value), - Object::Real(ref value) => write!(f, "{}", *value), - Object::Name(ref name) => write!(f, "/{}", String::from_utf8_lossy(name)), - Object::String(ref text, _) => write!(f, "({})", String::from_utf8_lossy(text)), - Object::Array(ref array) => { - let items = array.iter().map(|item| format!("{:?}", item)).collect::>(); - write!(f, "[{}]", items.join(" ")) - } - Object::Dictionary(ref dict) => write!(f, "{:?}", dict), - Object::Stream(ref stream) => write!(f, "{:?}stream...endstream", stream.dict), - Object::Reference(ref id) => write!(f, "{} {} R", id.0, id.1), - } - } -} - -impl Dictionary { - pub fn new() -> Dictionary { - Dictionary(LinkedHashMap::new()) - } - - pub fn has(&self, key: &[u8]) -> bool { - self.0.contains_key(key) - } - - pub fn get(&self, key: &[u8]) -> Result<&Object> { - self.0.get(key).ok_or(Error::DictKey) - } - - /// Extract object from dictionary, dereferencing the object if it - /// is a reference. - pub fn get_deref<'a>(&'a self, key: &[u8], doc: &'a Document) -> Result<&'a Object> { - doc.dereference(self.get(key)?).map(|(_, object)| object) - } - - pub fn get_mut(&mut self, key: &[u8]) -> Result<&mut Object> { - self.0.get_mut(key).ok_or(Error::DictKey) - } - - pub fn set(&mut self, key: K, value: V) - where - K: Into>, - V: Into, - { - self.0.insert(key.into(), value.into()); - } - - pub fn len(&self) -> usize { - self.0.len() - } - - pub fn is_empty(&self) -> bool { - self.0.len() == 0 - } - - pub fn remove(&mut self, key: &[u8]) -> Option { - self.0.remove(key) - } - - pub fn type_name(&self) -> Result<&str> { - self.get(b"Type") - .and_then(Object::as_name_str) - .or_else(|_| self.get(b"Linearized").and(Ok("Linearized"))) - } - - pub fn type_is(&self, type_name: &[u8]) -> bool { - self.get(b"Type").and_then(Object::as_name).ok() == Some(type_name) - } - - pub fn iter(&self) -> Iter<'_, Vec, Object> { - self.0.iter() - } - - pub fn iter_mut(&mut self) -> IterMut<'_, Vec, Object> { - self.0.iter_mut() - } - - pub fn get_font_encoding(&self) -> &str { - self.get(b"Encoding") - .and_then(Object::as_name_str) - .unwrap_or("StandardEncoding") - } - - pub fn extend(&mut self, other: &Dictionary) { - let keep_both_objects = - |new_dict: &mut LinkedHashMap, Object>, key: &Vec, value: &Object, old_value: &Object| { - let mut final_array; - - match value { - Object::Array(array) => { - final_array = Vec::with_capacity(array.len() + 1); - final_array.push(old_value.to_owned()); - final_array.extend(array.to_owned()); - } - _ => { - final_array = vec![value.to_owned(), old_value.to_owned()]; - } - } - - new_dict.insert(key.to_owned(), Object::Array(final_array)); - }; - - let mut new_dict = LinkedHashMap::with_capacity(other.0.len() + 1); - - for (key, value) in other.0.iter() { - if let Some(old_value) = self.0.get(key) { - match (old_value, value) { - (Object::Dictionary(old_dict), Object::Dictionary(dict)) => { - let mut replaced_dict = old_dict.to_owned(); - replaced_dict.extend(dict); - - new_dict.insert(key.to_owned(), Object::Dictionary(replaced_dict)); - } - (Object::Array(old_array), Object::Array(array)) => { - let mut replaced_array = old_array.to_owned(); - replaced_array.extend(array.to_owned()); - - new_dict.insert(key.to_owned(), Object::Array(replaced_array)); - } - (Object::Integer(old_id), Object::Integer(id)) => { - let array = vec![Object::Integer(*old_id), Object::Integer(*id)]; - - new_dict.insert(key.to_owned(), Object::Array(array)); - } - (Object::Real(old_id), Object::Real(id)) => { - let array = vec![Object::Real(*old_id), Object::Real(*id)]; - - new_dict.insert(key.to_owned(), Object::Array(array)); - } - (Object::String(old_ids, old_format), Object::String(ids, format)) => { - let array = vec![ - Object::String(old_ids.to_owned(), old_format.to_owned()), - Object::String(ids.to_owned(), format.to_owned()), - ]; - - new_dict.insert(key.to_owned(), Object::Array(array)); - } - (Object::Reference(old_object_id), Object::Reference(object_id)) => { - let array = vec![Object::Reference(*old_object_id), Object::Reference(*object_id)]; - - new_dict.insert(key.to_owned(), Object::Array(array)); - } - (Object::Null, _) | (Object::Boolean(_), _) | (Object::Name(_), _) | (Object::Stream(_), _) => { - new_dict.insert(key.to_owned(), old_value.to_owned()); - } - (_, _) => keep_both_objects(&mut new_dict, key, value, old_value), - } - } else { - new_dict.insert(key.to_owned(), value.to_owned()); - } - } - - self.0 = new_dict; - } -} - -#[macro_export] -macro_rules! dictionary { - () => { - $crate::Dictionary::new() - }; - ($( $key: expr => $value: expr ),+ ,) => { - dictionary!( $($key => $value),+ ) - }; - ($( $key: expr => $value: expr ),*) => {{ - let mut dict = $crate::Dictionary::new(); - $( - dict.set($key, $value); - )* - dict - }} -} - -impl fmt::Debug for Dictionary { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let entries = self - .into_iter() - .map(|(key, value)| format!("/{} {:?}", String::from_utf8_lossy(key), value)) - .collect::>(); - write!(f, "<<{}>>", entries.concat()) - } -} - -impl<'a> IntoIterator for &'a Dictionary { - type Item = (&'a Vec, &'a Object); - type IntoIter = linked_hash_map::Iter<'a, Vec, Object>; - - fn into_iter(self) -> Self::IntoIter { - self.0.iter() - } -} - -use std::iter::FromIterator; -impl>> FromIterator<(K, Object)> for Dictionary { - fn from_iter>(iter: I) -> Self { - let mut dict = Dictionary::new(); - for (k, v) in iter { - dict.set(k, v); - } - dict - } -} - -impl Stream { - pub fn new(mut dict: Dictionary, content: Vec) -> Stream { - dict.set("Length", content.len() as i64); - Stream { - dict, - content, - allows_compression: true, - start_position: None, - } - } - - pub fn with_position(dict: Dictionary, position: usize) -> Stream { - Stream { - dict, - content: vec![], - allows_compression: true, - start_position: Some(position), - } - } - - /// Default is that the stream may be compressed. On font streams, - /// set this to false, otherwise the font will be corrupt - #[inline] - pub fn with_compression(mut self, allows_compression: bool) -> Stream { - self.allows_compression = allows_compression; - self - } - - // Return first filter - pub fn filter(&self) -> Result { - self.filters() - .and_then(|f| f.into_iter().next().ok_or(Error::ObjectNotFound)) - } - - pub fn filters(&self) -> Result> { - let filter = self.dict.get(b"Filter")?; - - if let Ok(name) = filter.as_name_str() { - Ok(vec![name.into()]) - } else if let Ok(names) = filter.as_array() { - // It is an error if a single conversion fails. - names - .iter() - .map(|n| match Object::as_name_str(n) { - Ok(n) => Ok(String::from(n)), - Err(_) => Err(Error::Type), - }) - .collect() - } else { - Err(Error::Type) - } - } - - pub fn set_content(&mut self, content: Vec) { - self.content = content; - self.dict.set("Length", self.content.len() as i64); - } - - pub fn set_plain_content(&mut self, content: Vec) { - self.dict.remove(b"DecodeParms"); - self.dict.remove(b"Filter"); - self.dict.set("Length", content.len() as i64); - self.content = content; - } - - pub fn compress(&mut self) -> Result<()> { - use flate2::write::ZlibEncoder; - use flate2::Compression; - use std::io::prelude::*; - - if self.dict.get(b"Filter").is_err() { - let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best()); - encoder.write_all(self.content.as_slice())?; - let compressed = encoder.finish()?; - if compressed.len() + 19 < self.content.len() { - self.dict.set("Filter", "FlateDecode"); - self.set_content(compressed); - } - } - Ok(()) - } - - pub fn decompressed_content(&self) -> Result> { - let params = self.dict.get(b"DecodeParms").and_then(Object::as_dict).ok(); - let filters = self.filters()?; - - if self.dict.get(b"Subtype").and_then(Object::as_name_str).ok() == Some("Image") { - return Err(Error::Type); - } - - let mut input = self.content.as_slice(); - let mut output = None; - - // Filters are in decoding order. - for filter in filters { - output = Some(match filter.as_str() { - "FlateDecode" => Self::decompress_zlib(input, params)?, - "LZWDecode" => Self::decompress_lzw(input, params)?, - _ => { - return Err(Error::Type); - } - }); - input = output.as_ref().unwrap(); - } - - output.ok_or(Error::Type) - } - - fn decompress_lzw(input: &[u8], params: Option<&Dictionary>) -> Result> { - use weezl::{decode::Decoder, BitOrder}; - const MIN_BITS: u8 = 9; - - let early_change = params - .and_then(|p| p.get(b"EarlyChange").ok()) - .and_then(|p| Object::as_i64(p).ok()) - .map(|v| v != 0) - .unwrap_or(true); - - let mut decoder = if early_change { - Decoder::with_tiff_size_switch(BitOrder::Msb, MIN_BITS - 1) - } else { - Decoder::new(BitOrder::Msb, MIN_BITS - 1) - }; - - let output = Self::decompress_lzw_loop(input, &mut decoder); - Self::decompress_predictor(output, params) - } - - fn decompress_lzw_loop(input: &[u8], decoder: &mut weezl::decode::Decoder) -> Vec { - let mut output = vec![]; - - let result = decoder.into_stream(&mut output).decode_all(input); - if let Err(err) = result.status { - warn!("{}", err); - } - - output - } - - fn decompress_zlib(input: &[u8], params: Option<&Dictionary>) -> Result> { - use flate2::read::ZlibDecoder; - use std::io::prelude::*; - - let mut output = Vec::with_capacity(input.len() * 2); - let mut decoder = ZlibDecoder::new(input); - - if !input.is_empty() { - decoder.read_to_end(&mut output).unwrap_or_else(|err| { - warn!("{}", err); - 0 - }); - } - Self::decompress_predictor(output, params) - } - - fn decompress_predictor(mut data: Vec, params: Option<&Dictionary>) -> Result> { - use crate::filters::png; - - if let Some(params) = params { - let predictor = params.get(b"Predictor").and_then(Object::as_i64).unwrap_or(1); - if (10..=15).contains(&predictor) { - let pixels_per_row = params.get(b"Columns").and_then(Object::as_i64).unwrap_or(1) as usize; - let colors = params.get(b"Colors").and_then(Object::as_i64).unwrap_or(1) as usize; - let bits = params.get(b"BitsPerComponent").and_then(Object::as_i64).unwrap_or(8) as usize; - let bytes_per_pixel = colors * bits / 8; - data = png::decode_frame(data.as_slice(), bytes_per_pixel, pixels_per_row)?; - } - Ok(data) - } else { - Ok(data) - } - } - - pub fn decompress(&mut self) { - if let Ok(data) = self.decompressed_content() { - self.dict.remove(b"DecodeParms"); - self.dict.remove(b"Filter"); - self.set_content(data); - } - } -} +use crate::{Document, Error, Result}; +use linked_hash_map::{self, Iter, IterMut, LinkedHashMap}; +use log::warn; +use std::fmt; +use std::str; + +/// Object identifier consists of two parts: object number and generation number. +pub type ObjectId = (u32, u16); + +/// Dictionary object. +#[derive(Clone, Default)] +pub struct Dictionary(LinkedHashMap, Object>); + +/// Stream object +/// Warning - all streams must be indirect objects, while +/// the stream dictionary may be a direct object +#[derive(Debug, Clone)] +pub struct Stream { + /// Associated stream dictionary + pub dict: Dictionary, + /// Contents of the stream in bytes + pub content: Vec, + /// Can the stream be compressed by the `Document::compress()` function? + /// Font streams may not be compressed, for example + pub allows_compression: bool, + /// Stream data's position in PDF file. + pub start_position: Option, +} + +/// Basic PDF object types defined in an enum. +#[derive(Clone)] +pub enum Object { + Null, + Boolean(bool), + Integer(i64), + Real(f64), + Name(Vec), + String(Vec, StringFormat), + Array(Vec), + Dictionary(Dictionary), + Stream(Stream), + Reference(ObjectId), +} + +/// String objects can be written in two formats. +#[derive(Debug, Clone)] +pub enum StringFormat { + Literal, + Hexadecimal, +} + +impl Default for StringFormat { + fn default() -> StringFormat { + StringFormat::Literal + } +} + +impl From for Object { + fn from(value: bool) -> Self { + Object::Boolean(value) + } +} + +impl From for Object { + fn from(number: i64) -> Self { + Object::Integer(number) + } +} + +macro_rules! from_smaller_ints { + ($( $Int: ty )+) => { + $( + impl From<$Int> for Object { + fn from(number: $Int) -> Self { + Object::Integer(i64::from(number)) + } + } + )+ + } +} + +from_smaller_ints! { + i8 i16 i32 + u8 u16 u32 +} + +impl From for Object { + fn from(number: f64) -> Self { + Object::Real(number) + } +} + +impl From for Object { + fn from(number: f32) -> Self { + Object::Real(f64::from(number)) + } +} + +impl From for Object { + fn from(name: String) -> Self { + Object::Name(name.into_bytes()) + } +} + +impl<'a> From<&'a str> for Object { + fn from(name: &'a str) -> Self { + Object::Name(name.as_bytes().to_vec()) + } +} + +impl From> for Object { + fn from(array: Vec) -> Self { + Object::Array(array) + } +} + +impl From for Object { + fn from(dcit: Dictionary) -> Self { + Object::Dictionary(dcit) + } +} + +impl From for Object { + fn from(stream: Stream) -> Self { + Object::Stream(stream) + } +} + +impl From for Object { + fn from(id: ObjectId) -> Self { + Object::Reference(id) + } +} + +impl Object { + pub fn string_literal>>(s: S) -> Self { + Object::String(s.into(), StringFormat::Literal) + } + + pub fn is_null(&self) -> bool { + matches!(*self, Object::Null) + } + + pub fn as_bool(&self) -> Result { + match *self { + Object::Boolean(ref value) => Ok(*value), + _ => Err(Error::Type), + } + } + + pub fn as_i64(&self) -> Result { + match *self { + Object::Integer(ref value) => Ok(*value), + _ => Err(Error::Type), + } + } + + pub fn as_f64(&self) -> Result { + match *self { + Object::Real(ref value) => Ok(*value), + _ => Err(Error::Type), + } + } + + /// Get the object value as a float. + /// Unlike as_f64() this will also cast an Integer to a Real. + pub fn as_float(&self) -> Result { + match *self { + Object::Integer(ref value) => Ok(*value as f64), + Object::Real(ref value) => Ok(*value), + _ => Err(Error::Type), + } + } + + pub fn as_name(&self) -> Result<&[u8]> { + match *self { + Object::Name(ref name) => Ok(name), + _ => Err(Error::Type), + } + } + + pub fn as_name_str(&self) -> Result<&str> { + Ok(str::from_utf8(self.as_name()?)?) + } + + pub fn as_str(&self) -> Result<&[u8]> { + match self { + Object::String(string, _) => Ok(string), + _ => Err(Error::Type), + } + } + + pub fn as_str_mut(&mut self) -> Result<&mut Vec> { + match self { + Object::String(string, _) => Ok(string), + _ => Err(Error::Type), + } + } + + pub fn as_reference(&self) -> Result { + match *self { + Object::Reference(ref id) => Ok(*id), + _ => Err(Error::Type), + } + } + + pub fn as_array(&self) -> Result<&Vec> { + match *self { + Object::Array(ref arr) => Ok(arr), + _ => Err(Error::Type), + } + } + + pub fn as_array_mut(&mut self) -> Result<&mut Vec> { + match *self { + Object::Array(ref mut arr) => Ok(arr), + _ => Err(Error::Type), + } + } + + pub fn as_dict(&self) -> Result<&Dictionary> { + match *self { + Object::Dictionary(ref dict) => Ok(dict), + _ => Err(Error::Type), + } + } + + pub fn as_dict_mut(&mut self) -> Result<&mut Dictionary> { + match *self { + Object::Dictionary(ref mut dict) => Ok(dict), + _ => Err(Error::Type), + } + } + + pub fn as_stream(&self) -> Result<&Stream> { + match *self { + Object::Stream(ref stream) => Ok(stream), + _ => Err(Error::Type), + } + } + + pub fn as_stream_mut(&mut self) -> Result<&mut Stream> { + match *self { + Object::Stream(ref mut stream) => Ok(stream), + _ => Err(Error::Type), + } + } + + pub fn type_name(&self) -> Result<&str> { + match *self { + Object::Dictionary(ref dict) => dict.type_name(), + Object::Stream(ref stream) => stream.dict.type_name(), + _ => Err(Error::Type), + } + } +} + +impl fmt::Debug for Object { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Object::Null => f.write_str("null"), + Object::Boolean(ref value) => { + if *value { + f.write_str("true") + } else { + f.write_str("false") + } + } + Object::Integer(ref value) => write!(f, "{}", *value), + Object::Real(ref value) => write!(f, "{}", *value), + Object::Name(ref name) => write!(f, "/{}", String::from_utf8_lossy(name)), + Object::String(ref text, _) => write!(f, "({})", String::from_utf8_lossy(text)), + Object::Array(ref array) => { + let items = array.iter().map(|item| format!("{:?}", item)).collect::>(); + write!(f, "[{}]", items.join(" ")) + } + Object::Dictionary(ref dict) => write!(f, "{:?}", dict), + Object::Stream(ref stream) => write!(f, "{:?}stream...endstream", stream.dict), + Object::Reference(ref id) => write!(f, "{} {} R", id.0, id.1), + } + } +} + +impl Dictionary { + pub fn new() -> Dictionary { + Dictionary(LinkedHashMap::new()) + } + + pub fn has(&self, key: &[u8]) -> bool { + self.0.contains_key(key) + } + + pub fn get(&self, key: &[u8]) -> Result<&Object> { + self.0.get(key).ok_or(Error::DictKey) + } + + /// Extract object from dictionary, dereferencing the object if it + /// is a reference. + pub fn get_deref<'a>(&'a self, key: &[u8], doc: &'a Document) -> Result<&'a Object> { + doc.dereference(self.get(key)?).map(|(_, object)| object) + } + + pub fn get_mut(&mut self, key: &[u8]) -> Result<&mut Object> { + self.0.get_mut(key).ok_or(Error::DictKey) + } + + pub fn set(&mut self, key: K, value: V) + where + K: Into>, + V: Into, + { + self.0.insert(key.into(), value.into()); + } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn is_empty(&self) -> bool { + self.0.len() == 0 + } + + pub fn remove(&mut self, key: &[u8]) -> Option { + self.0.remove(key) + } + + pub fn type_name(&self) -> Result<&str> { + self.get(b"Type") + .and_then(Object::as_name_str) + .or_else(|_| self.get(b"Linearized").and(Ok("Linearized"))) + } + + pub fn type_is(&self, type_name: &[u8]) -> bool { + self.get(b"Type").and_then(Object::as_name).ok() == Some(type_name) + } + + pub fn iter(&self) -> Iter<'_, Vec, Object> { + self.0.iter() + } + + pub fn iter_mut(&mut self) -> IterMut<'_, Vec, Object> { + self.0.iter_mut() + } + + pub fn get_font_encoding(&self) -> &str { + self.get(b"Encoding") + .and_then(Object::as_name_str) + .unwrap_or("StandardEncoding") + } + + pub fn extend(&mut self, other: &Dictionary) { + let keep_both_objects = + |new_dict: &mut LinkedHashMap, Object>, key: &Vec, value: &Object, old_value: &Object| { + let mut final_array; + + match value { + Object::Array(array) => { + final_array = Vec::with_capacity(array.len() + 1); + final_array.push(old_value.to_owned()); + final_array.extend(array.to_owned()); + } + _ => { + final_array = vec![value.to_owned(), old_value.to_owned()]; + } + } + + new_dict.insert(key.to_owned(), Object::Array(final_array)); + }; + + let mut new_dict = LinkedHashMap::with_capacity(other.0.len() + 1); + + for (key, value) in other.0.iter() { + if let Some(old_value) = self.0.get(key) { + match (old_value, value) { + (Object::Dictionary(old_dict), Object::Dictionary(dict)) => { + let mut replaced_dict = old_dict.to_owned(); + replaced_dict.extend(dict); + + new_dict.insert(key.to_owned(), Object::Dictionary(replaced_dict)); + } + (Object::Array(old_array), Object::Array(array)) => { + let mut replaced_array = old_array.to_owned(); + replaced_array.extend(array.to_owned()); + + new_dict.insert(key.to_owned(), Object::Array(replaced_array)); + } + (Object::Integer(old_id), Object::Integer(id)) => { + let array = vec![Object::Integer(*old_id), Object::Integer(*id)]; + + new_dict.insert(key.to_owned(), Object::Array(array)); + } + (Object::Real(old_id), Object::Real(id)) => { + let array = vec![Object::Real(*old_id), Object::Real(*id)]; + + new_dict.insert(key.to_owned(), Object::Array(array)); + } + (Object::String(old_ids, old_format), Object::String(ids, format)) => { + let array = vec![ + Object::String(old_ids.to_owned(), old_format.to_owned()), + Object::String(ids.to_owned(), format.to_owned()), + ]; + + new_dict.insert(key.to_owned(), Object::Array(array)); + } + (Object::Reference(old_object_id), Object::Reference(object_id)) => { + let array = vec![Object::Reference(*old_object_id), Object::Reference(*object_id)]; + + new_dict.insert(key.to_owned(), Object::Array(array)); + } + (Object::Null, _) | (Object::Boolean(_), _) | (Object::Name(_), _) | (Object::Stream(_), _) => { + new_dict.insert(key.to_owned(), old_value.to_owned()); + } + (_, _) => keep_both_objects(&mut new_dict, key, value, old_value), + } + } else { + new_dict.insert(key.to_owned(), value.to_owned()); + } + } + + self.0 = new_dict; + } +} + +#[macro_export] +macro_rules! dictionary { + () => { + $crate::Dictionary::new() + }; + ($( $key: expr => $value: expr ),+ ,) => { + dictionary!( $($key => $value),+ ) + }; + ($( $key: expr => $value: expr ),*) => {{ + let mut dict = $crate::Dictionary::new(); + $( + dict.set($key, $value); + )* + dict + }} +} + +impl fmt::Debug for Dictionary { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let entries = self + .into_iter() + .map(|(key, value)| format!("/{} {:?}", String::from_utf8_lossy(key), value)) + .collect::>(); + write!(f, "<<{}>>", entries.concat()) + } +} + +impl<'a> IntoIterator for &'a Dictionary { + type Item = (&'a Vec, &'a Object); + type IntoIter = linked_hash_map::Iter<'a, Vec, Object>; + + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +use std::iter::FromIterator; +impl>> FromIterator<(K, Object)> for Dictionary { + fn from_iter>(iter: I) -> Self { + let mut dict = Dictionary::new(); + for (k, v) in iter { + dict.set(k, v); + } + dict + } +} + +impl Stream { + pub fn new(mut dict: Dictionary, content: Vec) -> Stream { + dict.set("Length", content.len() as i64); + Stream { + dict, + content, + allows_compression: true, + start_position: None, + } + } + + pub fn with_position(dict: Dictionary, position: usize) -> Stream { + Stream { + dict, + content: vec![], + allows_compression: true, + start_position: Some(position), + } + } + + /// Default is that the stream may be compressed. On font streams, + /// set this to false, otherwise the font will be corrupt + #[inline] + pub fn with_compression(mut self, allows_compression: bool) -> Stream { + self.allows_compression = allows_compression; + self + } + + // Return first filter + pub fn filter(&self) -> Result { + self.filters() + .and_then(|f| f.into_iter().next().ok_or(Error::ObjectNotFound)) + } + + pub fn filters(&self) -> Result> { + let filter = self.dict.get(b"Filter")?; + + if let Ok(name) = filter.as_name_str() { + Ok(vec![name.into()]) + } else if let Ok(names) = filter.as_array() { + // It is an error if a single conversion fails. + names + .iter() + .map(|n| match Object::as_name_str(n) { + Ok(n) => Ok(String::from(n)), + Err(_) => Err(Error::Type), + }) + .collect() + } else { + Err(Error::Type) + } + } + + pub fn set_content(&mut self, content: Vec) { + self.content = content; + self.dict.set("Length", self.content.len() as i64); + } + + pub fn set_plain_content(&mut self, content: Vec) { + self.dict.remove(b"DecodeParms"); + self.dict.remove(b"Filter"); + self.dict.set("Length", content.len() as i64); + self.content = content; + } + + pub fn compress(&mut self) -> Result<()> { + use flate2::write::ZlibEncoder; + use flate2::Compression; + use std::io::prelude::*; + + if self.dict.get(b"Filter").is_err() { + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best()); + encoder.write_all(self.content.as_slice())?; + let compressed = encoder.finish()?; + if compressed.len() + 19 < self.content.len() { + self.dict.set("Filter", "FlateDecode"); + self.set_content(compressed); + } + } + Ok(()) + } + + pub fn decompressed_content(&self) -> Result> { + let params = self.dict.get(b"DecodeParms").and_then(Object::as_dict).ok(); + let filters = self.filters()?; + + if self.dict.get(b"Subtype").and_then(Object::as_name_str).ok() == Some("Image") { + return Err(Error::Type); + } + + let mut input = self.content.as_slice(); + let mut output = None; + + // Filters are in decoding order. + for filter in filters { + output = Some(match filter.as_str() { + "FlateDecode" => Self::decompress_zlib(input, params)?, + "LZWDecode" => Self::decompress_lzw(input, params)?, + _ => { + return Err(Error::Type); + } + }); + input = output.as_ref().unwrap(); + } + + output.ok_or(Error::Type) + } + + fn decompress_lzw(input: &[u8], params: Option<&Dictionary>) -> Result> { + use weezl::{decode::Decoder, BitOrder}; + const MIN_BITS: u8 = 9; + + let early_change = params + .and_then(|p| p.get(b"EarlyChange").ok()) + .and_then(|p| Object::as_i64(p).ok()) + .map(|v| v != 0) + .unwrap_or(true); + + let mut decoder = if early_change { + Decoder::with_tiff_size_switch(BitOrder::Msb, MIN_BITS - 1) + } else { + Decoder::new(BitOrder::Msb, MIN_BITS - 1) + }; + + let output = Self::decompress_lzw_loop(input, &mut decoder); + Self::decompress_predictor(output, params) + } + + fn decompress_lzw_loop(input: &[u8], decoder: &mut weezl::decode::Decoder) -> Vec { + let mut output = vec![]; + + let result = decoder.into_stream(&mut output).decode_all(input); + if let Err(err) = result.status { + warn!("{}", err); + } + + output + } + + fn decompress_zlib(input: &[u8], params: Option<&Dictionary>) -> Result> { + use flate2::read::ZlibDecoder; + use std::io::prelude::*; + + let mut output = Vec::with_capacity(input.len() * 2); + let mut decoder = ZlibDecoder::new(input); + + if !input.is_empty() { + decoder.read_to_end(&mut output).unwrap_or_else(|err| { + warn!("{}", err); + 0 + }); + } + Self::decompress_predictor(output, params) + } + + fn decompress_predictor(mut data: Vec, params: Option<&Dictionary>) -> Result> { + use crate::filters::png; + + if let Some(params) = params { + let predictor = params.get(b"Predictor").and_then(Object::as_i64).unwrap_or(1); + if (10..=15).contains(&predictor) { + let pixels_per_row = params.get(b"Columns").and_then(Object::as_i64).unwrap_or(1) as usize; + let colors = params.get(b"Colors").and_then(Object::as_i64).unwrap_or(1) as usize; + let bits = params.get(b"BitsPerComponent").and_then(Object::as_i64).unwrap_or(8) as usize; + let bytes_per_pixel = colors * bits / 8; + data = png::decode_frame(data.as_slice(), bytes_per_pixel, pixels_per_row)?; + } + Ok(data) + } else { + Ok(data) + } + } + + pub fn decompress(&mut self) { + if let Ok(data) = self.decompressed_content() { + self.dict.remove(b"DecodeParms"); + self.dict.remove(b"Filter"); + self.set_content(data); + } + } +} diff --git a/src/writer.rs b/src/writer.rs index 3e9fce1..279277e 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -78,6 +78,7 @@ impl Document { generation: 0, }, ); + self.trailer.set("Type", Name(b"XRef".to_vec())); // Update `max_id` in trailer self.trailer.set("Size", i64::from(self.max_id + 1)); // Set the size of each entry in bytes (default for PDFs is `[1 2 1]`) @@ -89,9 +90,13 @@ impl Document { let filter = XRefStreamFilter::None; let (stream, stream_length, indexes) = Writer::create_xref_steam(xref, filter)?; self.trailer.set("Index", indexes); + if filter == XRefStreamFilter::ASCIIHexDecode { self.trailer.set("Filter", Name(b"ASCIIHexDecode".to_vec())); + } else { + self.trailer.remove(b"Filter"); } + self.trailer.set("Length", stream_length as i64); let trailer = &self.trailer; @@ -193,6 +198,7 @@ pub struct Writer; #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum XRefStreamFilter { ASCIIHexDecode, + _FlateDecode, //this is generally a Zlib compressed Stream. None, }