diff --git a/.travis.yml b/.travis.yml index 78291f0..aff6565 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ dist: trusty matrix: include: - - rust: 1.28.0 + - rust: 1.36.0 env: TARGET=x86_64-unknown-linux-gnu - rust: stable diff --git a/Cargo.toml b/Cargo.toml index b0df378..1771aa7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,11 @@ rawpointer = "0.2" bencher = "0.1.2" itertools = "0.7.11" +[features] +default = ["std"] + +std = [] + [profile.release] [profile.bench] diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 3cb0ff9..b994595 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -158,7 +158,7 @@ gemm_layout!{layout_f64_032, dgemm, } -use std::ops::{Add, Mul}; +use core::ops::{Add, Mul}; trait Z { fn zero() -> Self; diff --git a/blas-bench/benches/benchmarks.rs b/blas-bench/benches/benchmarks.rs index 91fda89..54457d1 100644 --- a/blas-bench/benches/benchmarks.rs +++ b/blas-bench/benches/benchmarks.rs @@ -7,7 +7,7 @@ pub use matrixmultiply::dgemm; extern crate bencher; extern crate blas; -use std::os::raw::c_int; +use core::os::raw::c_int; #[allow(non_camel_case_types)] diff --git a/examples/usegemm.rs b/examples/usegemm.rs index fc0e63e..efae1df 100644 --- a/examples/usegemm.rs +++ b/examples/usegemm.rs @@ -5,6 +5,7 @@ // // Jump down to the next place where it says EXAMPLE. +extern crate core; extern crate itertools; extern crate matrixmultiply; @@ -16,7 +17,7 @@ use itertools::{ enumerate, repeat_n, }; -use std::fmt::{Display, Debug}; +use core::fmt::{Display, Debug}; trait Float : Copy + Display + Debug + PartialEq { fn zero() -> Self; diff --git a/src/aligned_alloc.rs b/src/aligned_alloc.rs index f0f7392..da6e676 100644 --- a/src/aligned_alloc.rs +++ b/src/aligned_alloc.rs @@ -1,12 +1,11 @@ -use std::alloc; -use std::alloc::{Layout, handle_alloc_error}; -use std::{mem, cmp}; +use alloc::alloc::{self, Layout, handle_alloc_error}; +use core::{mem, cmp}; #[cfg(test)] -use std::ops::{Deref, DerefMut}; +use core::ops::{Deref, DerefMut}; #[cfg(test)] -use std::slice; +use core::slice; pub(crate) struct Alloc { ptr: *mut T, len: usize, align: usize } diff --git a/src/dgemm_kernel.rs b/src/dgemm_kernel.rs index 26d2f09..b40953f 100644 --- a/src/dgemm_kernel.rs +++ b/src/dgemm_kernel.rs @@ -12,9 +12,9 @@ use kernel::{U4, U8}; use archparam; #[cfg(target_arch="x86")] -use std::arch::x86::*; +use core::arch::x86::*; #[cfg(target_arch="x86_64")] -use std::arch::x86_64::*; +use core::arch::x86_64::*; #[cfg(any(target_arch="x86", target_arch="x86_64"))] use x86::{FusedMulAdd, AvxMulAdd, DMultiplyAdd}; @@ -812,6 +812,7 @@ unsafe fn at(ptr: *const T, i: usize) -> T { #[cfg(test)] mod tests { use super::*; + use alloc::vec; use aligned_alloc::Alloc; fn aligned_alloc(elt: T, n: usize) -> Alloc where T: Copy @@ -864,6 +865,7 @@ mod tests { mod test_arch_kernels { use super::test_a_kernel; use super::super::*; + use std::println; macro_rules! test_arch_kernels_x86 { ($($feature_name:tt, $name:ident, $kernel_ty:ty),*) => { $( diff --git a/src/gemm.rs b/src/gemm.rs index 68f1ce0..af55503 100644 --- a/src/gemm.rs +++ b/src/gemm.rs @@ -6,9 +6,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::cmp::min; -use std::mem::size_of; -use std::ptr::copy_nonoverlapping; +use core::cmp::min; +use core::mem::size_of; +use core::ptr::copy_nonoverlapping; use aligned_alloc::Alloc; diff --git a/src/kernel.rs b/src/kernel.rs index 1ca9c4d..8fe63d3 100644 --- a/src/kernel.rs +++ b/src/kernel.rs @@ -6,7 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::ops::{AddAssign, MulAssign}; +use core::ops::{AddAssign, MulAssign}; /// General matrix multiply kernel pub trait GemmKernel { diff --git a/src/lib.rs b/src/lib.rs index ff39e42..c9c1853 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,25 @@ //! - `avx` //! - `sse2` //! +//! ## Features +//! +//! This crate can be used without the standard library (`#![no_std]`) by +//! disabling the default `std` feature. To do so, use this in your +//! `Cargo.toml`: +//! +//! ```toml +//! matrixmultiply = { version = "0.2", default-features = false } +//! ``` +//! +//! Runtime CPU feature detection is available only when `std` is enabled. +//! Without the `std` feature, the crate uses special CPU features only if they +//! are enabled at compile time. (To enable CPU features at compile time, pass +//! the relevant +//! [`target-cpu`](https://doc.rust-lang.org/rustc/codegen-options/index.html#target-cpu) +//! or +//! [`target-feature`](https://doc.rust-lang.org/rustc/codegen-options/index.html#target-feature) +//! option to `rustc`.) +//! //! ## Other Notes //! //! The functions in this crate are thread safe, as long as the destination @@ -65,6 +84,12 @@ #![doc(html_root_url = "https://docs.rs/matrixmultiply/0.2/")] +#![no_std] +#[cfg(feature = "std")] +extern crate std; + +extern crate alloc; + extern crate rawpointer; #[macro_use] mod debugmacros; diff --git a/src/sgemm_kernel.rs b/src/sgemm_kernel.rs index 96f042c..72b88f5 100644 --- a/src/sgemm_kernel.rs +++ b/src/sgemm_kernel.rs @@ -13,9 +13,9 @@ use archparam; #[cfg(target_arch="x86")] -use std::arch::x86::*; +use core::arch::x86::*; #[cfg(target_arch="x86_64")] -use std::arch::x86_64::*; +use core::arch::x86_64::*; #[cfg(any(target_arch="x86", target_arch="x86_64"))] use x86::{FusedMulAdd, AvxMulAdd, SMultiplyAdd}; @@ -499,6 +499,7 @@ unsafe fn at(ptr: *const T, i: usize) -> T { #[cfg(test)] mod tests { use super::*; + use alloc::vec; use aligned_alloc::Alloc; fn aligned_alloc(elt: T, n: usize) -> Alloc where T: Copy @@ -551,6 +552,7 @@ mod tests { mod test_arch_kernels { use super::test_a_kernel; use super::super::*; + use std::println; macro_rules! test_arch_kernels_x86 { ($($feature_name:tt, $name:ident, $kernel_ty:ty),*) => { $( diff --git a/src/util.rs b/src/util.rs index a3524b2..8e66d35 100644 --- a/src/util.rs +++ b/src/util.rs @@ -6,7 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::cmp::min; +use core::cmp::min; pub struct RangeChunk { i: usize, n: usize, chunk: usize } diff --git a/src/x86/macros.rs b/src/x86/macros.rs index 7542b31..a25e13c 100644 --- a/src/x86/macros.rs +++ b/src/x86/macros.rs @@ -10,11 +10,23 @@ macro_rules! compile_env_matches_or_is_empty { } macro_rules! is_x86_feature_detected_ { - ($name:tt) => { - // for testing purposes, we can make sure only one specific feature - // is enabled by setting MMTEST_FEATURE=featurename (all others - // disabled). This does not force it to be detected, it must also be. - compile_env_matches_or_is_empty!("MMTEST_FEATURE", $name) && is_x86_feature_detected!($name) - } + ($name:tt) => {{ + #[cfg(feature="std")] + { + // For testing purposes, we can make sure only one specific feature + // is enabled by setting MMTEST_FEATURE=featurename (all others + // disabled). This does not force it to be detected, it must also be. + compile_env_matches_or_is_empty!("MMTEST_FEATURE", $name) && std::is_x86_feature_detected!($name) + } + #[cfg(not(feature="std"))] + { + // For testing purposes, we can make sure only one specific feature + // is enabled by setting MMTEST_FEATURE=featurename (all others + // disabled). This does not force it to be detected, it must also + // be. In the `no_std` case, the `is_86_feature_detected` macro is + // not available, so we have to fall back to checking whether the + // feature is enabled at compile-time. + compile_env_matches_or_is_empty!("MMTEST_FEATURE", $name) && cfg!(target_feature=$name) + } + }}; } - diff --git a/src/x86/mod.rs b/src/x86/mod.rs index 9e39027..3bbd666 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -1,8 +1,8 @@ #[cfg(target_arch="x86")] -use std::arch::x86::*; +use core::arch::x86::*; #[cfg(target_arch="x86_64")] -use std::arch::x86_64::*; +use core::arch::x86_64::*; #[macro_use] mod macros; diff --git a/tests/sgemm.rs b/tests/sgemm.rs index 89fa8d4..3ef863b 100644 --- a/tests/sgemm.rs +++ b/tests/sgemm.rs @@ -1,3 +1,4 @@ +extern crate core; extern crate itertools; extern crate matrixmultiply; @@ -9,7 +10,7 @@ use itertools::{ enumerate, repeat_n, }; -use std::fmt::{Display, Debug}; +use core::fmt::{Display, Debug}; trait Float : Copy + Display + Debug + PartialEq { fn zero() -> Self;