diff --git a/android/settings.gradle b/android/settings.gradle index 5433200811f..eedc203ab08 100644 --- a/android/settings.gradle +++ b/android/settings.gradle @@ -1,3 +1,4 @@ rootProject.name='android' include ':core' include ':pytorch-native' +include ':tokenizer-native' diff --git a/android/tokenizer-native/README.md b/android/tokenizer-native/README.md new file mode 100644 index 00000000000..affbed2e14e --- /dev/null +++ b/android/tokenizer-native/README.md @@ -0,0 +1,3 @@ +# Tokenizer + +This folder contains build of tokenizer-native library for Android. diff --git a/android/tokenizer-native/build.gradle b/android/tokenizer-native/build.gradle new file mode 100644 index 00000000000..691381f2d0b --- /dev/null +++ b/android/tokenizer-native/build.gradle @@ -0,0 +1,125 @@ +apply plugin: 'com.android.library' +apply plugin: 'maven-publish' +apply plugin: 'signing' + +android { + compileSdkVersion 34 + namespace "ai.djl.android.tokenizer_native" + + defaultConfig { + minSdkVersion 21 + targetSdkVersion 34 + versionCode 1 + versionName "1.0" + + consumerProguardFiles 'consumer-rules.pro' + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro" + } + } + + sourceSets { + main { + jniLibs.srcDirs = ["jnilib/"] + } + } + publishing { + singleVariant("release") { + withSourcesJar() + } + } +} + +// TODO: Making publish as an individual gradle file +signing { + required(project.hasProperty("staging") || project.hasProperty("snapshot")) + def signingKey = findProperty("signingKey") + def signingPassword = findProperty("signingPassword") + useInMemoryPgpKeys(signingKey, signingPassword) + sign publishing.publications +} + +afterEvaluate { + publishing { + publications { + release(MavenPublication) { + from components.release + pom { + name = "Tokenizer Android native library" + description = "Tokenizer Android native library binary distribution" + url = "http://www.djl.ai/android" + packaging = "aar" + + licenses { + license { + name = "The Apache License, Version 2.0" + url = "https://www.apache.org/licenses/LICENSE-2.0" + } + } + + scm { + connection = "scm:git:git@github.com:deepjavalibrary/djl.git" + developerConnection = "scm:git:git@github.com:deepjavalibrary/djl.git" + url = "/~https://github.com/deepjavalibrary/djl" + tag = "HEAD" + } + + developers { + developer { + name = "DJL.AI Team" + email = "djl-dev@amazon.com" + organization = "Amazon AI" + organizationUrl = "https://amazon.com" + } + } + } + } + } + + repositories { + maven { + if (project.hasProperty("snapshot")) { + name = "snapshot" + url = "https://oss.sonatype.org/content/repositories/snapshots/" + credentials { + username = findProperty("ossrhUsername") + password = findProperty("ossrhPassword") + } + } else if (project.hasProperty("staging")) { + name = "staging" + url = "https://oss.sonatype.org/service/local/staging/deploy/maven2/" + credentials { + username = findProperty("ossrhUsername") + password = findProperty("ossrhPassword") + } + } else { + name = "local" + url = "build/repo" + } + } + } + } +} + + +tasks.register('processResources') { + doLast { + def url = "https://publish.djl.ai/tokenizer/jnilib/android" + def abis = ["armeabi-v7a", "arm64-v8a", "x86", "x86_64"] + abis.each { abi -> + def downloadPath = new URL("${url}/${abi}/libdjl_tokenizer.so") + def file = file("${project.projectDir}/jnilib/${abi}/libdjl_tokenizer.so") + if (!file.exists()) { + project.logger.lifecycle("${url}/${abi}/libdjl_tokenizer.so") + file.getParentFile().mkdirs() + downloadPath.withInputStream { i -> file.withOutputStream { it << i } } + } + } + } +} + +assemble.dependsOn processResources diff --git a/android/tokenizer-native/consumer-rules.pro b/android/tokenizer-native/consumer-rules.pro new file mode 100644 index 00000000000..e69de29bb2d diff --git a/android/tokenizer-native/gradlew b/android/tokenizer-native/gradlew new file mode 120000 index 00000000000..502f5a2d3ec --- /dev/null +++ b/android/tokenizer-native/gradlew @@ -0,0 +1 @@ +../gradlew \ No newline at end of file diff --git a/android/tokenizer-native/proguard-rules.pro b/android/tokenizer-native/proguard-rules.pro new file mode 100644 index 00000000000..f1b424510da --- /dev/null +++ b/android/tokenizer-native/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile diff --git a/android/tokenizer-native/src/main/AndroidManifest.xml b/android/tokenizer-native/src/main/AndroidManifest.xml new file mode 100644 index 00000000000..c4e6c98d775 --- /dev/null +++ b/android/tokenizer-native/src/main/AndroidManifest.xml @@ -0,0 +1 @@ + diff --git a/extensions/tokenizers/build.cmd b/extensions/tokenizers/build.cmd index b7fd68b9baa..49efdfe88a4 100644 --- a/extensions/tokenizers/build.cmd +++ b/extensions/tokenizers/build.cmd @@ -11,4 +11,4 @@ cargo build --manifest-path %RUST_MANIFEST% --release @rem for nightly ci md build\jnilib\win-x86_64\cpu -copy rust\target\release\djl.dll build\jnilib\win-x86_64\cpu\tokenizers.dll +copy rust\target\release\djl_tokenizer.dll build\jnilib\win-x86_64\cpu\tokenizers.dll diff --git a/extensions/tokenizers/build.gradle.kts b/extensions/tokenizers/build.gradle.kts index 55ae6c9d818..dbf32273e71 100644 --- a/extensions/tokenizers/build.gradle.kts +++ b/extensions/tokenizers/build.gradle.kts @@ -131,6 +131,22 @@ tasks { } } + register("compileAndroidJNI"){ + doFirst { + for (abi in listOf("armeabi-v7a", "arm64-v8a", "x86", "x86_64")) { + exec { + commandLine("bash", "build_android.sh", abi) + } + val ciDir = project.projectDir / "jnilib/${libs.versions.djl.get()}/android/$abi" + copy { + from(buildDirectory / "jnilib" / "$abi") + into(ciDir) + } + delete("$buildDirectory/jnilib") + } + } + } + register("formatPython") { doFirst { exec { diff --git a/extensions/tokenizers/build.sh b/extensions/tokenizers/build.sh index 6478708a5ab..332de2bb5ff 100755 --- a/extensions/tokenizers/build.sh +++ b/extensions/tokenizers/build.sh @@ -21,10 +21,10 @@ function copy_files() { flavor="$2" if [[ $PLATFORM == 'darwin' ]]; then mkdir -p "build/jnilib/osx-$arch/$flavor" - cp -f rust/target/release/libdjl.dylib "build/jnilib/osx-$arch/$flavor/libtokenizers.dylib" + cp -f rust/target/release/libdjl_tokenizer.dylib "build/jnilib/osx-$arch/$flavor/libtokenizers.dylib" elif [[ $PLATFORM == 'linux' ]]; then mkdir -p "build/jnilib/linux-$arch/$flavor" - cp -f rust/target/release/libdjl.so "build/jnilib/linux-$arch/$flavor/libtokenizers.so" + cp -f rust/target/release/libdjl_tokenizer.so "build/jnilib/linux-$arch/$flavor/libtokenizers.so" fi } diff --git a/extensions/tokenizers/build_android.sh b/extensions/tokenizers/build_android.sh new file mode 100644 index 00000000000..05034469ded --- /dev/null +++ b/extensions/tokenizers/build_android.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -ex + +WORK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +export WORK_DIR + +FLAVOR=$1 + +if [ ! -d "build" ]; then + mkdir build +fi + +pushd . + +if [ ! -d "build" ]; then + mkdir build +fi + +rm -rf build/classes +mkdir build/classes + +javac -sourcepath src/main/java/ src/main/java/ai/djl/huggingface/tokenizers/jni/TokenizersLibrary.java -h build/include -d build/classes +javac -sourcepath src/main/java/ src/main/java/ai/djl/engine/rust/RustLibrary.java -h build/include -d build/classes + +cd rust/ +cargo ndk -t $FLAVOR -o $WORK_DIR/build/jnilib --platform=21 build --release +cd .. +popd diff --git a/extensions/tokenizers/rust/Cargo.toml b/extensions/tokenizers/rust/Cargo.toml index 93394982d82..ebfa6a93ff7 100644 --- a/extensions/tokenizers/rust/Cargo.toml +++ b/extensions/tokenizers/rust/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "djl" +name = "djl_tokenizer" version = "0.1.0" authors = ["Frank Liu "] edition = "2021" @@ -13,7 +13,7 @@ candle-flash-attn = { version = "*", optional = true } candle-cublaslt = { git = "/~https://github.com/huggingface/candle-cublaslt", rev = "cf789b7dd6d4abb19b03b9556442f94f0588b4a0", optional = true } candle-layer-norm = { git = "/~https://github.com/xyang16/candle-layer-norm", rev = "e574de6a7f88bafbede8edf9ee43170c6a8ce51a", optional = true } candle-rotary = { git = "/~https://github.com/huggingface/candle-rotary", rev = "0a718a0856569a92f3112e64f10d07e4447822e8", optional = true } -tokenizers = { version = "0.20.3", features = ["http"] } +tokenizers = { version = "0.20.3" } half = "2.4.0" tracing = "0.1.40" safetensors = "0.4.3" @@ -33,6 +33,9 @@ openssl-src = { git = "/~https://github.com/alexcrichton/openssl-src-rs", rev = "c [target.'cfg(target_os = "linux")'.dependencies] openssl = { version = "0.10", features = ["vendored"] } +[target.'cfg(not(target_os = "android"))'.dependencies] +tokenizers = { version = "0.20.0", features = ["http"] } + [lib] crate-type = ["cdylib"] diff --git a/extensions/tokenizers/rust/src/build.rs b/extensions/tokenizers/rust/src/build.rs new file mode 100644 index 00000000000..c3da5df22a4 --- /dev/null +++ b/extensions/tokenizers/rust/src/build.rs @@ -0,0 +1,26 @@ +// see /~https://github.com/bbqsrc/cargo-ndk?tab=readme-ov-file#usage + +use std::{env, path::{Path, PathBuf}}; + +fn main() { + if env::var("CARGO_CFG_TARGET_OS").unwrap() == "android" { + android(); + } +} + +fn android() { + println!("cargo:rustc-link-lib=c++_shared"); + + if let Ok(output_path) = env::var("CARGO_NDK_OUTPUT_PATH") { + let sysroot_libs_path = + PathBuf::from(env::var_os("CARGO_NDK_SYSROOT_LIBS_PATH").unwrap()); + let lib_path = sysroot_libs_path.join("libc++_shared.so"); + std::fs::copy( + lib_path, + Path::new(&output_path) + .join(&env::var("CARGO_NDK_ANDROID_TARGET").unwrap()) + .join("libc++_shared.so"), + ) + .unwrap(); + } +} \ No newline at end of file diff --git a/extensions/tokenizers/rust/src/lib.rs b/extensions/tokenizers/rust/src/lib.rs index 59bc0a4f2f4..3d4cd1264ad 100644 --- a/extensions/tokenizers/rust/src/lib.rs +++ b/extensions/tokenizers/rust/src/lib.rs @@ -40,8 +40,12 @@ use tk::tokenizer::{EncodeInput, Encoding}; use tk::utils::padding::{PaddingParams, PaddingStrategy}; use tk::utils::truncation::{TruncationParams, TruncationStrategy}; use tk::Tokenizer; -use tk::{FromPretrainedParameters, Offsets}; +use tk::Offsets; +#[cfg(not(target_os = "android"))] +use tk::FromPretrainedParameters; + +#[cfg(not(target_os = "android"))] #[no_mangle] pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizer< 'local, @@ -72,6 +76,20 @@ pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_ } } +#[cfg(target_os = "android")] +#[no_mangle] +pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizer< + 'local, +>( + mut env: JNIEnv<'local>, + _: JObject, + _: JString, + _: JString, +) -> jlong { + env.throw("Not supported on Android").unwrap(); + 0 +} + #[no_mangle] pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizerFromString< 'local, diff --git a/extensions/tokenizers/src/main/java/ai/djl/huggingface/tokenizers/jni/LibUtils.java b/extensions/tokenizers/src/main/java/ai/djl/huggingface/tokenizers/jni/LibUtils.java index 5e779af9a9b..d8f981a8915 100644 --- a/extensions/tokenizers/src/main/java/ai/djl/huggingface/tokenizers/jni/LibUtils.java +++ b/extensions/tokenizers/src/main/java/ai/djl/huggingface/tokenizers/jni/LibUtils.java @@ -66,6 +66,11 @@ public static void checkStatus() { } private static void loadLibrary() { + if ("http://www.android.com/".equals(System.getProperty("java.vendor.url"))) { + System.loadLibrary("djl_tokenizer"); // NOPMD + return; + } + String[] libs; if (System.getProperty("os.name").startsWith("Windows")) { libs =