Skip to content

Commit

Permalink
Initial constant-time implementation of Base64 for generic & AVX2
Browse files Browse the repository at this point in the history
  • Loading branch information
Jethro Beekman committed Apr 22, 2020
1 parent 477fabc commit 825280b
Show file tree
Hide file tree
Showing 17 changed files with 1,624 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/target
**/*.rs.bk
Cargo.lock
19 changes: 19 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
branches:
only:
# This is where pull requests from "bors r+" are built.
- staging
# This is where pull requests from "bors try" are built.
- trying
# Not really necessary, just to get a green badge on “master”
- master
language: rust

jobs:
include:
- rust: stable
- rust: beta
- rust: nightly
env: FEATURES=--features=nightly
script:
- cargo test --no-default-features
- cargo test $FEATURES
31 changes: 31 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[package]
name = "b64-ct"
version = "0.1.0"
authors = ["Fortanix, Inc."]
license = "MPL-2.0"
edition = "2018"
description = """
Fast and secure Base64 encoding/decoding.
This crate provides an implementation of Base64 encoding/decoding that is
designed to be resistant against software side-channel attacks (such as timing
& cache attacks), see the documentation for details. On certain platforms it
also uses SIMD making it very fast. This makes it suitable for e.g. decoding
cryptographic private keys in PEM format.
The API is very similar to the base64 implementation in the old rustc-serialize
crate, making it easy to use in existing projects.
"""
repository = "/~https://github.com/fortanix/b64-ct/"
keywords = ["base64", "constant-time"]
categories = ["cryptography", "encoding", "no-std"]
readme = "README.md"

[features]
default = ["std"]
std = []
nightly = [] # Used only for testing

[dev-dependencies]
rand = "0.6"
paste = "0.1"
62 changes: 62 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,3 +1,65 @@
This project as a whole is licensed under the terms of the Mozilla Public
License, Version 2.0, see below. This project includes code written by The Rust
Project Developers and Wojciech Muła, license terms below.

----------------------------------------------------------------------------

Copyright (c) 2014 The Rust Project Developers

Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

----------------------------------------------------------------------------

Copyright (c) 2015-2018, Wojciech Muła
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

----------------------------------------------------------------------------

Mozilla Public License Version 2.0
==================================

Expand Down
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
# Fast and secure Base64 encoding/decoding

This crate provides an implementation of Base64 encoding/decoding that is
designed to be resistant against software side-channel attacks (such as timing
& cache attacks), see the [documentation] for details. On certain platforms it
also uses SIMD making it very fast. This makes it suitable for e.g. decoding
cryptographic private keys in PEM format.

The API is very similar to the base64 implementation in the old rustc-serialize
crate, making it easy to use in existing projects.

[documentation]: https://docs.rs/b64-ct

# Implementation

Depending on the runtime CPU architecture, this crate uses different
implementations with different security properties.

* x86 with AVX2: All lookup tables are implemented with SIMD
instructions. No secret-dependent memory accceses.
* Other platforms: Lookups are limited to 64-byte aligned lookup tables. On
platforms with 64-byte cache lines this may be sufficient to prevent
certain cache side-channel attacks. However, it's known that this is [not
sufficient for all platforms].

We graciously welcome contributed support for other platforms!

[not sufficient on some platforms]: https://ts.data61.csiro.au/projects/TS/cachebleed/

# Contributing

We gratefully accept bug reports and contributions from the community.
Expand Down
3 changes: 3 additions & 0 deletions bors.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
status = [
"continuous-integration/travis-ci/push",
]
37 changes: 37 additions & 0 deletions src/avx2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/* Copyright (c) Fortanix, Inc.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use core::arch::x86_64::*;

pub(crate) unsafe fn dup_mm_setr_epi8(e: [i8; 16]) -> __m256i {
_mm256_setr_epi8(
e[0x0], e[0x1], e[0x2], e[0x3], e[0x4], e[0x5], e[0x6], e[0x7],
e[0x8], e[0x9], e[0xa], e[0xb], e[0xc], e[0xd], e[0xe], e[0xf],
e[0x0], e[0x1], e[0x2], e[0x3], e[0x4], e[0x5], e[0x6], e[0x7],
e[0x8], e[0x9], e[0xa], e[0xb], e[0xc], e[0xd], e[0xe], e[0xf],
)
}

pub(crate) unsafe fn dup_mm_setr_epu8(e: [u8; 16]) -> __m256i {
_mm256_setr_epi8(
e[0x0] as _, e[0x1] as _, e[0x2] as _, e[0x3] as _, e[0x4] as _, e[0x5] as _, e[0x6] as _, e[0x7] as _,
e[0x8] as _, e[0x9] as _, e[0xa] as _, e[0xb] as _, e[0xc] as _, e[0xd] as _, e[0xe] as _, e[0xf] as _,
e[0x0] as _, e[0x1] as _, e[0x2] as _, e[0x3] as _, e[0x4] as _, e[0x5] as _, e[0x6] as _, e[0x7] as _,
e[0x8] as _, e[0x9] as _, e[0xa] as _, e[0xb] as _, e[0xc] as _, e[0xd] as _, e[0xe] as _, e[0xf] as _,
)
}

pub(crate) unsafe fn _mm256_not_si256(i: __m256i) -> __m256i {
_mm256_xor_si256(i, _mm256_set1_epi8(!0))
}

pub(crate) unsafe fn array_as_m256i(v: [u8; 32]) -> __m256i {
core::mem::transmute(v)
}

pub(crate) unsafe fn m256i_as_array(v: __m256i) -> [u8; 32] {
core::mem::transmute(v)
}
200 changes: 200 additions & 0 deletions src/decode/avx2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
/* Copyright (c) Fortanix, Inc.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use core::arch::x86_64::*;
use crate::avx2::*;

#[target_feature(enable = "avx2,bmi1,sse4.2,popcnt")]
unsafe fn decode_avx2(input: __m256i) -> (__m256i, u32, u32) {
// Step 0. Split input bytes into nibbles.
let higher_nibble = _mm256_and_si256(_mm256_srli_epi16(input, 4), _mm256_set1_epi8(0x0f));
let lower_nibble = _mm256_and_si256(input, _mm256_set1_epi8(0x0f));

// Step 1. Find invalid characters. Steps 2 & 3 will compute invalid 6-bit
// values for invalid characters. The result of the computation should only
// be used if no invalid characters are found.

// This table contains 128 bits, one bit for each of the lower 128 ASCII
// characters. A set bit indicates that the character is in the base64
// character set (the character is valid) or the character is considered
// ASCII whitespace. This table is indexed by ASCII low nibble.
let row_lut = dup_mm_setr_epu8([
0b1010_1100, 0b1111_1000, 0b1111_1000, 0b1111_1000,
0b1111_1000, 0b1111_1000, 0b1111_1000, 0b1111_1000,
0b1111_1000, 0b1111_1001, 0b1111_0001, 0b0101_0100,
0b0101_0001, 0b0101_0101, 0b0101_0000, 0b0111_0100,
]);

// This table contains column offsets (within a byte) for the table above.
// This table is indexed by ASCII high nibble.
let column_lut = dup_mm_setr_epu8([
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
0, 0, 0, 0, 0, 0, 0, 0,
]);

// Lookup table row
let row = _mm256_shuffle_epi8(row_lut, lower_nibble);
// Lookup column offset
let column = _mm256_shuffle_epi8(column_lut, higher_nibble);
// Lookup valid characters
let valid = _mm256_and_si256(row, column);
// Compute invalid character mask
let non_match = _mm256_cmpeq_epi8(valid, _mm256_setzero_si256());
// Transform mask to u32
let invalid_mask = _mm256_movemask_epi8(non_match);

// Step 2. Numbers & letters: compute 6-bit value for the 3 different
// ranges by simply adjusting the ASCII value.

// This table contains the offsets for the alphanumerical ASCII ranges.
// This table is indexed by ASCII high nibble.
let shift_lut = dup_mm_setr_epi8([
0, 0, 0,
// '0' through '9'
4,
// 'A' through 'Z'
-65, -65,
// 'a' through 'z'
-71, -71,
0, 0, 0, 0, 0, 0, 0, 0,
]);

// Get offset
let shift = _mm256_shuffle_epi8(shift_lut, higher_nibble);
// Compute 6-bit value
let shifted = _mm256_add_epi8(input, shift);

// Step 3. Special characters: lookup 6-bit value by looking it up in a
// table.

// This table specifies the ASCII ranges that contain valid special
// characters. This table is indexed by ASCII high nibble.
let spcrange_lut = dup_mm_setr_epu8([
0, 0, 0xff, 0, 0, 0xff, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
]);

// This table specifies the (inverted) 6-bit values for the special
// characters. The values in this table act as both a value and a blend
// mask. This table is indexed by the difference between ASCII low and high
// nibble.
let spcchar_lut = dup_mm_setr_epu8([
0, 0, 0, 0, 0, 0, 0, 0,
// '+', '_', '-', '/'
0, !62, !63, !62, 0, !63, 0, 0,
]);

// Check if character is in the range for special characters
let sel_range = _mm256_shuffle_epi8(spcrange_lut, higher_nibble);
// Compute difference between ASCII low and high nibble
let lo_sub_hi = _mm256_sub_epi8(lower_nibble, higher_nibble);
// Lookup special character 6-bit value
let specials = _mm256_shuffle_epi8(spcchar_lut, lo_sub_hi);
// Combine blend masks from range and value
let sel_spec = _mm256_and_si256(sel_range, specials);

// Combine results of step 1 and step 2
let result = _mm256_blendv_epi8(shifted, _mm256_not_si256(specials), sel_spec);

// Step 4. Compute mask for valid non-whitespace bytes. The mask will be
// used to copy only relevant bytes into the output.

// This table specifies the character ranges which should be decoded. The
// format is a range table for the PCMPESTRM instruction.
let valid_nonws_set = _mm_setr_epi8(
b'A' as _, b'Z' as _,
b'a' as _, b'z' as _,
b'0' as _, b'9' as _,
b'+' as _, b'+' as _,
b'/' as _, b'/' as _,
b'-' as _, b'-' as _,
b'_' as _, b'_' as _,
0, 0,
);

// Split input into 128-bit values
let lane0 = _mm256_extracti128_si256(input, 0);
let lane1 = _mm256_extracti128_si256(input, 1);
// Compute bitmask for each 128-bit value
let mask0 = _mm_cmpestrm(valid_nonws_set, 14, lane0, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_BIT_MASK);
let mask1 = _mm_cmpestrm(valid_nonws_set, 14, lane1, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_BIT_MASK);
// Combine bitmasks into integer value
let valid_mask = _mm_extract_epi16(mask0, 0) as u32 | ((_mm_extract_epi16(mask1, 0) as u32) << 16);

(result, invalid_mask as _, valid_mask as _)
}

pub(super) struct Avx2;

impl super::Decoder for Avx2 {
type Block = [u8; 32];

#[inline]
fn decode_block(block: &mut Self::Block) -> super::BlockResult {
#[target_feature(enable = "avx2,bmi1,sse4.2,popcnt")]
unsafe {
let input = array_as_m256i(*block);

let (unpacked, invalid_mask, mut valid_mask) = decode_avx2(input);

let unpacked = m256i_as_array(unpacked);

let first_invalid = match invalid_mask.trailing_zeros() {
32 => None,
v => Some(v as _),
};
let out_length = valid_mask.count_ones() as _;

let mut out_iter = block.iter_mut();
for &val in unpacked.iter() {
if (valid_mask & 1) == 1 {
*out_iter.next().unwrap() = val;
}
valid_mask >>= 1;
}

super::BlockResult {
out_length,
first_invalid,
}
}
}

#[inline(always)]
fn zero_block() -> Self::Block {
[b' '; 32]
}
}

impl super::Packer for Avx2 {
type Input = [u8; 32];
const OUT_BUF_LEN: usize = 28;

fn pack_block(input: &Self::Input, output: &mut [u8]) {
#[target_feature(enable = "avx2,bmi1,sse4.2,popcnt")]
unsafe {
assert_eq!(output.len(), Self::OUT_BUF_LEN);

let unpacked = array_as_m256i(*input);

// Pack 32× 6-bit values into 16× 12-bit values
let packed1 = _mm256_maddubs_epi16(unpacked, _mm256_set1_epi16(0x0140));
// Pack 16× 12-bit values into 8× 3-byte values
let packed2 = _mm256_madd_epi16(packed1, _mm256_set1_epi32(0x00011000));
// Pack 8× 3-byte values into 2× 12-byte values
let packed3 = _mm256_shuffle_epi8(packed2, dup_mm_setr_epu8([
2, 1, 0,
6, 5, 4,
10, 9, 8,
14, 13, 12,
0xff, 0xff, 0xff, 0xff,
]));

_mm_storeu_si128(output.as_mut_ptr() as _, _mm256_extracti128_si256(packed3, 0));
_mm_storeu_si128(output.as_mut_ptr().offset(12) as _, _mm256_extracti128_si256(packed3, 1));
}
}
}
Loading

0 comments on commit 825280b

Please sign in to comment.