From 4be0451a8a9e2d37f2b704ba78fdc91e28e28324 Mon Sep 17 00:00:00 2001
From: Wonwoo Choi <chwo9843@gmail.com>
Date: Sun, 5 Jan 2025 17:34:09 +0900
Subject: [PATCH] Implement HLG transfer functions

---
 jxl/src/color/tf.rs       | 301 +++++++++++++++++++++++++++++++++-----
 jxl/src/util/fast_math.rs |   8 +-
 2 files changed, 267 insertions(+), 42 deletions(-)

diff --git a/jxl/src/color/tf.rs b/jxl/src/color/tf.rs
index e158c92..96d425e 100644
--- a/jxl/src/color/tf.rs
+++ b/jxl/src/color/tf.rs
@@ -13,39 +13,6 @@ const SRGB_POWTABLE_LOWER: [u8; 16] = [
     0x00, 0xb7, 0x04, 0x0d, 0xcb, 0xe7, 0x41, 0x68, 0x51, 0xd1, 0xeb, 0xf2, 0x00, 0xb7, 0x04, 0x0d,
 ];
 
-const PQ_M1: f64 = 2610.0 / 16384.0;
-const PQ_M2: f64 = (2523.0 / 4096.0) * 128.0;
-const PQ_C1: f64 = 3424.0 / 4096.0;
-const PQ_C2: f64 = (2413.0 / 4096.0) * 32.0;
-const PQ_C3: f64 = (2392.0 / 4096.0) * 32.0;
-
-const PQ_EOTF_P: [f32; 5] = [
-    2.6297566e-4,
-    -6.235531e-3,
-    7.386023e-1,
-    2.6455317,
-    5.500349e-1,
-];
-const PQ_EOTF_Q: [f32; 5] = [
-    4.213501e2,
-    -4.2873682e2,
-    1.7436467e2,
-    -3.3907887e1,
-    2.6771877,
-];
-
-const PQ_INV_EOTF_P: [f32; 5] = [1.351392e-2, -1.095778, 5.522776e1, 1.492516e2, 4.838434e1];
-const PQ_INV_EOTF_Q: [f32; 5] = [1.012416, 2.016708e1, 9.26371e1, 1.120607e2, 2.590418e1];
-const PQ_INV_EOTF_P_SMALL: [f32; 5] = [
-    9.863406e-6,
-    3.881234e-1,
-    1.352821e2,
-    6.889862e4,
-    -2.864824e5,
-];
-const PQ_INV_EOTF_Q_SMALL: [f32; 5] =
-    [3.371868e1, 1.477719e3, 1.608477e4, -4.389884e4, -2.072546e5];
-
 /// Converts the linear samples with the sRGB transfer curve.
 // Fast linear to sRGB conversion, ported from libjxl. Max error ~1.7e-4
 pub fn linear_to_srgb_fast(samples: &mut [f32]) {
@@ -160,6 +127,12 @@ pub fn bt709_to_linear(samples: &mut [f32]) {
     }
 }
 
+const PQ_M1: f64 = 2610.0 / 16384.0;
+const PQ_M2: f64 = (2523.0 / 4096.0) * 128.0;
+const PQ_C1: f64 = 3424.0 / 4096.0;
+const PQ_C2: f64 = (2413.0 / 4096.0) * 32.0;
+const PQ_C3: f64 = (2392.0 / 4096.0) * 32.0;
+
 /// Converts linear sample to PQ signal using PQ inverse EOTF, where linear sample value of 1.0
 /// represents `intensity_target` display nits.
 ///
@@ -202,6 +175,33 @@ pub fn pq_to_linear_precise(intensity_target: f32, samples: &mut [f32]) {
     }
 }
 
+const PQ_EOTF_P: [f32; 5] = [
+    2.6297566e-4,
+    -6.235531e-3,
+    7.386023e-1,
+    2.6455317,
+    5.500349e-1,
+];
+const PQ_EOTF_Q: [f32; 5] = [
+    4.213501e2,
+    -4.2873682e2,
+    1.7436467e2,
+    -3.3907887e1,
+    2.6771877,
+];
+
+const PQ_INV_EOTF_P: [f32; 5] = [1.351392e-2, -1.095778, 5.522776e1, 1.492516e2, 4.838434e1];
+const PQ_INV_EOTF_Q: [f32; 5] = [1.012416, 2.016708e1, 9.26371e1, 1.120607e2, 2.590418e1];
+const PQ_INV_EOTF_P_SMALL: [f32; 5] = [
+    9.863406e-6,
+    3.881234e-1,
+    1.352821e2,
+    6.889862e4,
+    -2.864824e5,
+];
+const PQ_INV_EOTF_Q_SMALL: [f32; 5] =
+    [3.371868e1, 1.477719e3, 1.608477e4, -4.389884e4, -2.072546e5];
+
 /// Converts linear sample to PQ signal using PQ inverse EOTF, where linear sample value of 1.0
 /// represents `intensity_target` display nits.
 ///
@@ -242,6 +242,148 @@ pub fn pq_to_linear(intensity_target: f32, samples: &mut [f32]) {
     }
 }
 
+const HLG_A: f64 = 0.17883277;
+const HLG_B: f64 = 1.0 - 4.0 * HLG_A;
+const HLG_C: f64 = 0.5599107295;
+
+fn hlg_ootf_inner_precise(exp: f64, [lr, lg, lb]: [f32; 3], [sr, sg, sb]: [&mut [f32]; 3]) {
+    if exp.abs() < 0.1 {
+        return;
+    }
+
+    let lr = lr as f64;
+    let lg = lg as f64;
+    let lb = lb as f64;
+    for ((r, g), b) in std::iter::zip(sr, sg).zip(sb) {
+        let dr = *r as f64;
+        let dg = *g as f64;
+        let db = *b as f64;
+        let mixed = dr.mul_add(lr, dg.mul_add(lg, db * lb));
+        let mult = mixed.powf(exp);
+        *r = (dr * mult) as f32;
+        *g = (dg * mult) as f32;
+        *b = (db * mult) as f32;
+    }
+}
+
+fn hlg_ootf_inner(exp: f32, [lr, lg, lb]: [f32; 3], [sr, sg, sb]: [&mut [f32]; 3]) {
+    if exp.abs() < 0.1 {
+        return;
+    }
+
+    for ((r, g), b) in std::iter::zip(sr, sg).zip(sb) {
+        let mixed = r.mul_add(lr, g.mul_add(lg, *b * lb));
+        let mult = crate::util::fast_powf(mixed, exp);
+        *r *= mult;
+        *g *= mult;
+        *b *= mult;
+    }
+}
+
+/// Converts scene-referred linear samples to display-referred linear samples using HLG OOTF.
+///
+/// This version uses double precision arithmetic internally.
+pub fn hlg_scene_to_display_precise(
+    intensity_display: f32,
+    luminance_rgb: [f32; 3],
+    samples_rgb: [&mut [f32]; 3],
+) {
+    let system_gamma = 1.2f64 * 1.111f64.powf((intensity_display as f64 / 1e3).log2());
+    let gamma_sub_one = system_gamma - 1.0;
+    hlg_ootf_inner_precise(gamma_sub_one, luminance_rgb, samples_rgb);
+}
+
+/// Converts display-referred linear samples to scene-referred linear samples using HLG inverse
+/// OOTF.
+///
+/// This version uses double precision arithmetic internally.
+pub fn hlg_display_to_scene_precise(
+    intensity_display: f32,
+    luminance_rgb: [f32; 3],
+    samples_rgb: [&mut [f32]; 3],
+) {
+    let system_gamma = 1.2f64 * 1.111f64.powf((intensity_display as f64 / 1e3).log2());
+    let one_sub_gamma = 1.0 - system_gamma;
+    hlg_ootf_inner_precise(one_sub_gamma / system_gamma, luminance_rgb, samples_rgb);
+}
+
+/// Converts scene-referred linear samples to display-referred linear samples using HLG OOTF.
+///
+/// This version uses `fast_powf` to compute power function.
+pub fn hlg_scene_to_display(
+    intensity_display: f32,
+    luminance_rgb: [f32; 3],
+    samples_rgb: [&mut [f32]; 3],
+) {
+    let system_gamma = 1.2f32 * 1.111f32.powf((intensity_display / 1e3).log2());
+    let gamma_sub_one = system_gamma - 1.0;
+    hlg_ootf_inner(gamma_sub_one, luminance_rgb, samples_rgb);
+}
+
+/// Converts display-referred linear samples to scene-referred linear samples using HLG inverse
+/// OOTF.
+///
+/// This version uses `fast_powf` to compute power function.
+pub fn hlg_display_to_scene(
+    intensity_display: f32,
+    luminance_rgb: [f32; 3],
+    samples_rgb: [&mut [f32]; 3],
+) {
+    let system_gamma = 1.2f32 * 1.111f32.powf((intensity_display / 1e3).log2());
+    let one_sub_gamma = 1.0 - system_gamma;
+    hlg_ootf_inner(one_sub_gamma / system_gamma, luminance_rgb, samples_rgb);
+}
+
+/// Converts scene-referred linear sample to HLG signal.
+///
+/// This version uses double precision arithmetic internally.
+pub fn scene_to_hlg_precise(samples: &mut [f32]) {
+    for s in samples {
+        let a = s.abs() as f64;
+        let y = if a <= 1.0 / 12.0 {
+            (3.0 * a).sqrt()
+        } else {
+            // TODO(tirr-c): maybe use mul_add?
+            HLG_A * (12.0 * a - HLG_B).ln() + HLG_C
+        };
+        *s = (y as f32).copysign(*s);
+    }
+}
+
+/// Converts HLG signal to scene-referred linear sample.
+///
+/// This version uses double precision arithmetic internally.
+pub fn hlg_to_scene_precise(samples: &mut [f32]) {
+    for s in samples {
+        let a = s.abs() as f64;
+        let y = if a <= 0.5 {
+            a * a * 3.0
+        } else {
+            (((a - HLG_C) / HLG_A).exp() + HLG_B) / 12.0
+        };
+        *s = (y as f32).copysign(*s);
+    }
+}
+
+/// Converts scene-referred linear sample to HLG signal.
+///
+/// This version uses `fast_log2f` to apply logarithmic function.
+// Max error: ~5e-7
+pub fn scene_to_hlg(samples: &mut [f32]) {
+    for s in samples {
+        let a = s.abs();
+        let y = if a <= 1.0 / 12.0 {
+            (3.0 * a).sqrt()
+        } else {
+            // TODO(tirr-c): maybe use mul_add?
+            let log = crate::util::fast_log2f(12.0 * a - HLG_B as f32);
+            // log2 x = ln x / ln 2, therefore ln x = (ln 2)(log2 x)
+            (HLG_A * std::f64::consts::LN_2) as f32 * log + HLG_C as f32
+        };
+        *s = y.copysign(*s);
+    }
+}
+
 #[cfg(test)]
 mod test {
     use test_log::test;
@@ -270,7 +412,7 @@ mod test {
     #[test]
     fn srgb_roundtrip_arb() {
         arbtest::arbtest(|u| {
-            let samples: Vec<f32> = arb_samples(u)?;
+            let samples = arb_samples(u)?;
             let mut output = samples.clone();
 
             linear_to_srgb(&mut output);
@@ -283,7 +425,7 @@ mod test {
     #[test]
     fn bt709_roundtrip_arb() {
         arbtest::arbtest(|u| {
-            let samples: Vec<f32> = arb_samples(u)?;
+            let samples = arb_samples(u)?;
             let mut output = samples.clone();
 
             linear_to_bt709(&mut output);
@@ -296,7 +438,7 @@ mod test {
     #[test]
     fn linear_to_srgb_fast_arb() {
         arbtest::arbtest(|u| {
-            let mut samples: Vec<f32> = arb_samples(u)?;
+            let mut samples = arb_samples(u)?;
             let mut fast = samples.clone();
 
             linear_to_srgb(&mut samples);
@@ -310,7 +452,7 @@ mod test {
     fn linear_to_pq_arb() {
         arbtest::arbtest(|u| {
             let intensity_target = u.int_in_range(9900..=10100)? as f32;
-            let mut samples: Vec<f32> = arb_samples(u)?;
+            let mut samples = arb_samples(u)?;
             let mut precise = samples.clone();
 
             linear_to_pq(intensity_target, &mut samples);
@@ -325,7 +467,7 @@ mod test {
     fn pq_to_linear_arb() {
         arbtest::arbtest(|u| {
             let intensity_target = u.int_in_range(9900..=10100)? as f32;
-            let mut samples: Vec<f32> = arb_samples(u)?;
+            let mut samples = arb_samples(u)?;
             let mut precise = samples.clone();
 
             pq_to_linear(intensity_target, &mut samples);
@@ -334,4 +476,87 @@ mod test {
             Ok(())
         });
     }
+
+    #[test]
+    fn hlg_ootf_arb() {
+        arbtest::arbtest(|u| {
+            let intensity_target = u.int_in_range(900..=1100)? as f32;
+
+            let lr = 0.2 + u.int_in_range(0..=255)? as f32 / 255.0;
+            let lb = 0.2 + u.int_in_range(0..=255)? as f32 / 255.0;
+            let lg = 1.0 - lr - lb;
+            let luminance_rgb = [lr, lg, lb];
+
+            let r = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32;
+            let g = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32;
+            let b = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32;
+
+            let mut fast_r = r;
+            let mut fast_g = g;
+            let mut fast_b = b;
+            let fast = [
+                std::slice::from_mut(&mut fast_r),
+                std::slice::from_mut(&mut fast_g),
+                std::slice::from_mut(&mut fast_b),
+            ];
+            hlg_display_to_scene(intensity_target, luminance_rgb, fast);
+
+            let mut precise_r = r;
+            let mut precise_g = g;
+            let mut precise_b = b;
+            let precise = [
+                std::slice::from_mut(&mut precise_r),
+                std::slice::from_mut(&mut precise_g),
+                std::slice::from_mut(&mut precise_b),
+            ];
+            hlg_display_to_scene(intensity_target, luminance_rgb, precise);
+
+            assert_all_almost_eq!(
+                &[fast_r, fast_g, fast_b],
+                &[precise_r, precise_g, precise_b],
+                7.2e-7
+            );
+
+            let mut fast_r = r;
+            let mut fast_g = g;
+            let mut fast_b = b;
+            let fast = [
+                std::slice::from_mut(&mut fast_r),
+                std::slice::from_mut(&mut fast_g),
+                std::slice::from_mut(&mut fast_b),
+            ];
+            hlg_scene_to_display(intensity_target, luminance_rgb, fast);
+
+            let mut precise_r = r;
+            let mut precise_g = g;
+            let mut precise_b = b;
+            let precise = [
+                std::slice::from_mut(&mut precise_r),
+                std::slice::from_mut(&mut precise_g),
+                std::slice::from_mut(&mut precise_b),
+            ];
+            hlg_scene_to_display(intensity_target, luminance_rgb, precise);
+
+            assert_all_almost_eq!(
+                &[fast_r, fast_g, fast_b],
+                &[precise_r, precise_g, precise_b],
+                7.2e-7
+            );
+
+            Ok(())
+        });
+    }
+
+    #[test]
+    fn scene_to_hlg_arb() {
+        arbtest::arbtest(|u| {
+            let mut samples = arb_samples(u)?;
+            let mut precise = samples.clone();
+
+            scene_to_hlg(&mut samples);
+            scene_to_hlg_precise(&mut precise);
+            assert_all_almost_eq!(&samples, &precise, 5e-7);
+            Ok(())
+        });
+    }
 }
diff --git a/jxl/src/util/fast_math.rs b/jxl/src/util/fast_math.rs
index f964b96..1ea5751 100644
--- a/jxl/src/util/fast_math.rs
+++ b/jxl/src/util/fast_math.rs
@@ -11,7 +11,7 @@ const POW2F_NUMER_COEFFS: [f32; 3] = [1.01749063e1, 4.88687798e1, 9.85506591e1];
 const POW2F_DENOM_COEFFS: [f32; 4] = [2.10242958e-1, -2.22328856e-2, -1.94414990e1, 9.85506633e1];
 
 #[inline]
-fn fast_pow2f(x: f32) -> f32 {
+pub fn fast_pow2f(x: f32) -> f32 {
     let x_floor = x.floor();
     let exp = f32::from_bits(((x_floor as i32 + 127) as u32) << 23);
     let frac = x - x_floor;
@@ -40,11 +40,11 @@ const LOG2F_Q: [f32; 3] = [
 ];
 
 #[inline]
-fn fast_log2f(x: f32) -> f32 {
+pub fn fast_log2f(x: f32) -> f32 {
     let x_bits = x.to_bits() as i32;
-    let exp_bits = x_bits - 0x3f2aaaab;
+    let exp_bits = x_bits.wrapping_sub(0x3f2aaaab);
     let exp_shifted = exp_bits >> 23;
-    let mantissa = f32::from_bits((x_bits - (exp_shifted << 23)) as u32);
+    let mantissa = f32::from_bits((x_bits.wrapping_sub(exp_shifted << 23)) as u32);
     let exp_val = exp_shifted as f32;
 
     let x = mantissa - 1.0;