diff --git a/jxl/src/color/tf.rs b/jxl/src/color/tf.rs
index ef9baef..4d5b789 100644
--- a/jxl/src/color/tf.rs
+++ b/jxl/src/color/tf.rs
@@ -99,6 +99,32 @@ pub fn srgb_to_linear(samples: &mut [f32]) {
     }
 }
 
+/// Converts the linear samples with the BT.709 transfer curve.
+pub fn linear_to_bt709(samples: &mut [f32]) {
+    for s in samples {
+        let a = s.abs();
+        *s = if a <= 0.018 {
+            a * 4.5
+        } else {
+            crate::util::fast_powf(a, 0.45).mul_add(1.099, -0.099)
+        }
+        .copysign(*s);
+    }
+}
+
+/// Converts samples in BT.709 transfer curve to linear. Inverse of `linear_to_bt709`.
+pub fn bt709_to_linear(samples: &mut [f32]) {
+    for s in samples {
+        let a = s.abs();
+        *s = if a <= 0.081 {
+            a / 4.5
+        } else {
+            crate::util::fast_powf(a.mul_add(1.0 / 1.099, 0.099 / 1.099), 1.0 / 0.45)
+        }
+        .copysign(*s);
+    }
+}
+
 #[cfg(test)]
 mod test {
     use test_log::test;
@@ -138,6 +164,19 @@ mod test {
         });
     }
 
+    #[test]
+    fn bt709_roundtrip_arb() {
+        arbtest::arbtest(|u| {
+            let samples: Vec<f32> = arb_samples(u)?;
+            let mut output = samples.clone();
+
+            linear_to_bt709(&mut output);
+            bt709_to_linear(&mut output);
+            assert_all_almost_eq!(&output, &samples, 5e-6);
+            Ok(())
+        });
+    }
+
     #[test]
     fn linear_to_srgb_fast_arb() {
         arbtest::arbtest(|u| {
diff --git a/jxl/src/util.rs b/jxl/src/util.rs
index 6dc7889..f0f220a 100644
--- a/jxl/src/util.rs
+++ b/jxl/src/util.rs
@@ -8,6 +8,7 @@ pub mod test;
 
 mod bits;
 mod concat_slice;
+mod fast_math;
 mod linalg;
 mod log2;
 mod rational_poly;
@@ -17,6 +18,7 @@ mod vec_helpers;
 
 pub use bits::*;
 pub use concat_slice::*;
+pub use fast_math::*;
 pub use linalg::*;
 pub use log2::*;
 pub use rational_poly::*;
diff --git a/jxl/src/util/fast_math.rs b/jxl/src/util/fast_math.rs
new file mode 100644
index 0000000..f964b96
--- /dev/null
+++ b/jxl/src/util/fast_math.rs
@@ -0,0 +1,82 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#![allow(clippy::excessive_precision)]
+
+use super::eval_rational_poly;
+
+const POW2F_NUMER_COEFFS: [f32; 3] = [1.01749063e1, 4.88687798e1, 9.85506591e1];
+const POW2F_DENOM_COEFFS: [f32; 4] = [2.10242958e-1, -2.22328856e-2, -1.94414990e1, 9.85506633e1];
+
+#[inline]
+fn fast_pow2f(x: f32) -> f32 {
+    let x_floor = x.floor();
+    let exp = f32::from_bits(((x_floor as i32 + 127) as u32) << 23);
+    let frac = x - x_floor;
+
+    let num = frac + POW2F_NUMER_COEFFS[0];
+    let num = num * frac + POW2F_NUMER_COEFFS[1];
+    let num = num * frac + POW2F_NUMER_COEFFS[2];
+    let num = num * exp;
+
+    let den = POW2F_DENOM_COEFFS[0] * frac + POW2F_DENOM_COEFFS[1];
+    let den = den * frac + POW2F_DENOM_COEFFS[2];
+    let den = den * frac + POW2F_DENOM_COEFFS[3];
+
+    num / den
+}
+
+const LOG2F_P: [f32; 3] = [
+    -1.8503833400518310e-6,
+    1.4287160470083755,
+    7.4245873327820566e-1,
+];
+const LOG2F_Q: [f32; 3] = [
+    9.9032814277590719e-1,
+    1.0096718572241148,
+    1.7409343003366853e-1,
+];
+
+#[inline]
+fn fast_log2f(x: f32) -> f32 {
+    let x_bits = x.to_bits() as i32;
+    let exp_bits = x_bits - 0x3f2aaaab;
+    let exp_shifted = exp_bits >> 23;
+    let mantissa = f32::from_bits((x_bits - (exp_shifted << 23)) as u32);
+    let exp_val = exp_shifted as f32;
+
+    let x = mantissa - 1.0;
+    eval_rational_poly(x, LOG2F_P, LOG2F_Q) + exp_val
+}
+
+// Max relative error: ~3e-5
+#[inline]
+pub fn fast_powf(base: f32, exp: f32) -> f32 {
+    fast_pow2f(fast_log2f(base) * exp)
+}
+
+#[cfg(test)]
+mod test {
+    use test_log::test;
+
+    use super::*;
+
+    #[test]
+    fn fast_powf_arb() {
+        arbtest::arbtest(|u| {
+            // (0.0, 128.0]
+            let base = u.int_in_range(1..=1 << 24)? as f32 / (1 << 17) as f32;
+            // [-8.0, 8.0]
+            let exp = u.int_in_range(-(1i32 << 23)..=1 << 23)? as f32 / (1 << 20) as f32;
+
+            let expected = base.powf(exp);
+            let actual = fast_powf(base, exp);
+            let abs_error = (actual - expected).abs();
+            let rel_error = abs_error / expected;
+            assert!(rel_error < 3e-5);
+            Ok(())
+        });
+    }
+}