Skip to content

Commit

Permalink
Add browser-native encoding / decoding for images
Browse files Browse the repository at this point in the history
  • Loading branch information
fschutt committed Feb 28, 2025
1 parent f67d6e9 commit 78d9de2
Show file tree
Hide file tree
Showing 12 changed files with 727 additions and 323 deletions.
5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ azulc = { git = "/~https://github.com/fschutt/azul", rev = "a09ccbd5be6429f8c40c1
rust-fontconfig = { version = "0.1.13", default-features = false, optional = true }
xmlparser = { version = "0.13.6", default-features = false, optional = true }
serde_json = { version = "1", optional = true }
web-sys = { version = "0.3.77", optional = true, default-features = false, features = ["ImageData", "Window", "Document", "Blob", "CanvasRenderingContext2d", "HtmlCanvasElement", "HtmlImageElement", "ImageBitmap", "BlobPropertyBag"]}
wasm-bindgen-futures = { version = "0.4.50", optional = true, default-features = false }

[target.'cfg(target_family = "wasm")'.dependencies]
wasm-bindgen = { version = "0.2" }
Expand All @@ -49,7 +51,7 @@ serde_json = { version = "1" }
[features]
default = ["html"]
html = ["azul-css", "azul-css-parser", "azul-core", "azul-layout", "azul-text-layout", "azulc", "rust-fontconfig", "xmlparser", "serde_json"]
js-sys = ["dep:js-sys"] # enables js-sys features on wasm
js-sys = ["dep:js-sys", "web-sys", "wasm-bindgen-futures"] # wasm + browser context
gif = ["image/gif"]
jpeg = ["image/jpeg"]
png = ["image/png"]
Expand All @@ -62,6 +64,7 @@ hdr = ["image/hdr"]
dds = ["image/dds"]
webp = ["image/webp"]
rayon = ["image/rayon"] # enables multithreading for decoding images
wasm-bindgen-futures = ["dep:wasm-bindgen-futures"]

[package.metadata.docs.rs]
all-features = true
Expand Down
2 changes: 1 addition & 1 deletion examples/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ fn main() {
let font = ParsedFont::from_bytes(ROBOTO_TTF, 0).unwrap();
let fid = doc.add_font(&font);
let ops = vec![Op::WriteText {
text: "Hello World!".to_string(),
items: vec!["Hello World!".into()],
size: Pt(20.0),
font: fid,
}];
Expand Down
205 changes: 132 additions & 73 deletions src/deserialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ use crate::{
BuiltinFont, Color, DictItem, ExtendedGraphicsStateId, ExtendedGraphicsStateMap, FontId,
LineDashPattern, LinePoint, Op, PageAnnotMap, ParsedFont, PdfDocument, PdfDocumentInfo,
PdfFontMap, PdfLayerMap, PdfMetadata, PdfPage, PdfResources, PolygonRing, RawImage,
RenderingIntent, TextMatrix, TextRenderingMode, XObject, XObjectId, XObjectMap,
RenderingIntent, TextItem, TextMatrix, TextRenderingMode, XObject, XObjectId, XObjectMap,
cmap::ToUnicodeCMap,
conformance::PdfConformance,
date::{OffsetDateTime, UtcOffset},
};
Expand Down Expand Up @@ -804,42 +805,38 @@ pub fn parse_op(

// --- Text showing with spacing ---
"TJ" => {
// 'TJ' shows text with individual spacing adjustments.
if !state.in_text_mode {
warnings.push(PdfWarnMsg::error(
page,
op_id,
"Warning: 'TJ' outside of text mode!".to_string(),
));
}

if op.operands.is_empty() {
warnings.push(PdfWarnMsg::error(
page,
op_id,
"Warning: 'TJ' with no operands".to_string(),
));
} else if let Some(arr) = op.operands.get(0).and_then(|o| o.as_array().ok()) {
let mut text_str = String::new();
for item in arr {
match item {
// When a string is encountered, append it.
Object::String(bytes, _) => {
text_str.push_str(&String::from_utf8_lossy(bytes));
}
// Numeric values indicate spacing adjustments (kerning).
// You could choose to handle these specially.
Object::Integer(_i) => {
// For simplicity, ignore spacing adjustments.
}
_ => {
warnings.push(PdfWarnMsg::error(
page,
op_id,
"Warning: unexpected element in TJ array".to_string(),
));
// Get the font for CMap lookup
let to_unicode_cmap =
if let (Some(fid), _) = (&state.current_font, state.current_font_size) {
match fonts.get(fid) {
Some(ParsedOrBuiltinFont::P(font)) => {
// Try to get the CMap from the parsed font
find_to_unicode_cmap(font)
}
_ => None,
}
}
}
} else {
None
};

// Decode the TJ array into TextItems
let text_items = crate::text::decode_tj_operands(arr, to_unicode_cmap.as_ref());

if let (Some(fid), Some(sz)) = (&state.current_font, state.current_font_size) {
let f = match fonts.get(fid) {
Some(s) => s,
Expand All @@ -849,14 +846,14 @@ pub fn parse_op(
match f {
ParsedOrBuiltinFont::B(b) => {
out_ops.push(Op::WriteTextBuiltinFont {
text: text_str,
items: text_items,
font: b.clone(),
size: sz,
});
}
ParsedOrBuiltinFont::P(_) => {
out_ops.push(Op::WriteText {
text: text_str,
items: text_items,
font: fid.clone(),
size: sz,
});
Expand All @@ -878,6 +875,77 @@ pub fn parse_op(
}
}

"Tj" => {
if !state.in_text_mode {
warnings.push(PdfWarnMsg::error(
page,
op_id,
"Warning: 'Tj' outside of text mode!".to_string(),
));
}

if op.operands.is_empty() {
warnings.push(PdfWarnMsg::error(
page,
op_id,
"Warning: 'Tj' with no operands".to_string(),
));
} else if let lopdf::Object::String(bytes, format) = &op.operands[0] {
// Get the font for CMap lookup
let to_unicode_cmap =
if let (Some(fid), _) = (&state.current_font, state.current_font_size) {
match fonts.get(fid) {
Some(ParsedOrBuiltinFont::P(font)) => {
// Try to get the CMap from the parsed font
find_to_unicode_cmap_from_font(font)
}
_ => None,
}
} else {
None
};

// Create a temporary lopdf::Object for decoding
let string_obj = lopdf::Object::String(bytes.clone(), *format);

// Decode the PDF string using the CMap if available
let text_str =
crate::text::decode_pdf_string(&string_obj, to_unicode_cmap.as_ref());

// Create a single TextItem with no kerning
let text_items = vec![TextItem::Text(text_str)];

if let (Some(fid), Some(sz)) = (&state.current_font, state.current_font_size) {
let f = match fonts.get(fid) {
Some(s) => s,
None => &ParsedOrBuiltinFont::B(BuiltinFont::TimesRoman),
};

match f {
ParsedOrBuiltinFont::B(b) => {
out_ops.push(Op::WriteTextBuiltinFont {
items: text_items,
font: b.clone(),
size: sz,
});
}
ParsedOrBuiltinFont::P(_) => {
out_ops.push(Op::WriteText {
items: text_items,
font: fid.clone(),
size: sz,
});
}
}
}
} else {
warnings.push(PdfWarnMsg::error(
page,
op_id,
"Warning: 'Tj' operand is not string".to_string(),
));
}
}
"T*" => {
out_ops.push(Op::AddLineBreak);
}
Expand Down Expand Up @@ -1068,55 +1136,6 @@ pub fn parse_op(
}
}

// --- Show text (Tj) single string example ---
"Tj" => {
if !state.in_text_mode {
warnings.push(PdfWarnMsg::error(
page,
op_id,
format!("Warning: 'Tj' outside of text mode!"),
));
}
if op.operands.is_empty() {
warnings.push(PdfWarnMsg::error(
page,
op_id,
format!("Warning: 'Tj' with no operands"),
));
} else if let lopdf::Object::String(bytes, _) = &op.operands[0] {
let text_str = String::from_utf8_lossy(bytes).to_string();
if let (Some(fid), Some(sz)) = (&state.current_font, state.current_font_size) {
let f = match fonts.get(fid) {
Some(s) => s,
None => &ParsedOrBuiltinFont::B(BuiltinFont::TimesRoman),
};

match f {
ParsedOrBuiltinFont::B(b) => {
out_ops.push(Op::WriteTextBuiltinFont {
text: text_str,
font: b.clone(),
size: sz,
});
}
ParsedOrBuiltinFont::P(_) => {
out_ops.push(Op::WriteText {
text: text_str,
font: fid.clone(),
size: sz,
});
}
}
}
} else {
warnings.push(PdfWarnMsg::error(
page,
op_id,
format!("Warning: 'Tj' operand is not string"),
));
}
}

// --- Move text cursor (Td) ---
"Td" => {
if op.operands.len() == 2 {
Expand Down Expand Up @@ -1717,6 +1736,46 @@ pub fn parse_op(
Ok(out_ops)
}

/// Try to find or create a ToUnicodeCMap from a ParsedFont
fn find_to_unicode_cmap_from_font(font: &ParsedFont) -> Option<ToUnicodeCMap> {
// First check if the font has a direct reference to a CMap
if let Some(cmap_subtable) = &font.cmap_subtable {
// Convert from OwnedCmapSubtable to ToUnicodeCMap
let mut mappings = BTreeMap::new();

// Construct a manual mapping from the CMap subtable data
for c in 0..65535u32 {
if let Ok(Some(gid)) = cmap_subtable.map_glyph(c) {
mappings.insert(gid as u32, vec![c]);
}
}

return Some(ToUnicodeCMap { mappings });
}

// If no CMap found in the font, return None
None
}

/// Helper to decode TJ array contents using CMap if available
fn find_to_unicode_cmap(font: &ParsedFont) -> Option<ToUnicodeCMap> {
// Fallback: Try to create a ToUnicode CMap from the font's cmap subtable
if let Some(cmap_subtable) = &font.cmap_subtable {
let mut mappings = BTreeMap::new();

// Construct a mapping from the CMap subtable data
for unicode in 0..65535u32 {
if let Ok(Some(gid)) = cmap_subtable.map_glyph(unicode) {
mappings.insert(gid as u32, vec![unicode]);
}
}

return Some(ToUnicodeCMap { mappings });
}

None
}

/// Returns a default date (Unix epoch)
fn default_date() -> OffsetDateTime {
OffsetDateTime::from_unix_timestamp(0).unwrap()
Expand Down
14 changes: 11 additions & 3 deletions src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use lopdf::Object::{Array, Integer};
use serde_derive::{Deserialize, Serialize};
use time::error::Parse;

use crate::{FontId, Op, PdfPage};
use crate::{FontId, Op, PdfPage, TextItem};

/// Builtin or external font
#[derive(Debug, Clone, PartialEq)]
Expand Down Expand Up @@ -361,9 +361,17 @@ impl ParsedFont {
.iter()
.flat_map(|p| {
p.ops.iter().filter_map(|s| match s {
Op::WriteText { font, text, .. } => {
Op::WriteText { font, items, .. } => {
if font_id == font {
Some(CharsOrCodepoint::Chars(text.clone()))
Some(CharsOrCodepoint::Chars(
items
.iter()
.filter_map(|s| match s {
TextItem::Text(t) => Some(t.clone()),
TextItem::Offset(_) => None,
})
.collect(),
))
} else {
None
}
Expand Down
Loading

0 comments on commit 78d9de2

Please sign in to comment.