diff --git a/android/settings.gradle b/android/settings.gradle
index 5433200811f..eedc203ab08 100644
--- a/android/settings.gradle
+++ b/android/settings.gradle
@@ -1,3 +1,4 @@
rootProject.name='android'
include ':core'
include ':pytorch-native'
+include ':tokenizer-native'
diff --git a/android/tokenizer-native/README.md b/android/tokenizer-native/README.md
new file mode 100644
index 00000000000..affbed2e14e
--- /dev/null
+++ b/android/tokenizer-native/README.md
@@ -0,0 +1,3 @@
+# Tokenizer
+
+This folder contains build of tokenizer-native library for Android.
diff --git a/android/tokenizer-native/build.gradle b/android/tokenizer-native/build.gradle
new file mode 100644
index 00000000000..691381f2d0b
--- /dev/null
+++ b/android/tokenizer-native/build.gradle
@@ -0,0 +1,125 @@
+apply plugin: 'com.android.library'
+apply plugin: 'maven-publish'
+apply plugin: 'signing'
+
+android {
+ compileSdkVersion 34
+ namespace "ai.djl.android.tokenizer_native"
+
+ defaultConfig {
+ minSdkVersion 21
+ targetSdkVersion 34
+ versionCode 1
+ versionName "1.0"
+
+ consumerProguardFiles 'consumer-rules.pro'
+ }
+
+ buildTypes {
+ release {
+ minifyEnabled false
+ proguardFiles getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro"
+ }
+ }
+
+ sourceSets {
+ main {
+ jniLibs.srcDirs = ["jnilib/"]
+ }
+ }
+ publishing {
+ singleVariant("release") {
+ withSourcesJar()
+ }
+ }
+}
+
+// TODO: Making publish as an individual gradle file
+signing {
+ required(project.hasProperty("staging") || project.hasProperty("snapshot"))
+ def signingKey = findProperty("signingKey")
+ def signingPassword = findProperty("signingPassword")
+ useInMemoryPgpKeys(signingKey, signingPassword)
+ sign publishing.publications
+}
+
+afterEvaluate {
+ publishing {
+ publications {
+ release(MavenPublication) {
+ from components.release
+ pom {
+ name = "Tokenizer Android native library"
+ description = "Tokenizer Android native library binary distribution"
+ url = "http://www.djl.ai/android"
+ packaging = "aar"
+
+ licenses {
+ license {
+ name = "The Apache License, Version 2.0"
+ url = "https://www.apache.org/licenses/LICENSE-2.0"
+ }
+ }
+
+ scm {
+ connection = "scm:git:git@github.com:deepjavalibrary/djl.git"
+ developerConnection = "scm:git:git@github.com:deepjavalibrary/djl.git"
+ url = "/~https://github.com/deepjavalibrary/djl"
+ tag = "HEAD"
+ }
+
+ developers {
+ developer {
+ name = "DJL.AI Team"
+ email = "djl-dev@amazon.com"
+ organization = "Amazon AI"
+ organizationUrl = "https://amazon.com"
+ }
+ }
+ }
+ }
+ }
+
+ repositories {
+ maven {
+ if (project.hasProperty("snapshot")) {
+ name = "snapshot"
+ url = "https://oss.sonatype.org/content/repositories/snapshots/"
+ credentials {
+ username = findProperty("ossrhUsername")
+ password = findProperty("ossrhPassword")
+ }
+ } else if (project.hasProperty("staging")) {
+ name = "staging"
+ url = "https://oss.sonatype.org/service/local/staging/deploy/maven2/"
+ credentials {
+ username = findProperty("ossrhUsername")
+ password = findProperty("ossrhPassword")
+ }
+ } else {
+ name = "local"
+ url = "build/repo"
+ }
+ }
+ }
+ }
+}
+
+
+tasks.register('processResources') {
+ doLast {
+ def url = "https://publish.djl.ai/tokenizer/jnilib/android"
+ def abis = ["armeabi-v7a", "arm64-v8a", "x86", "x86_64"]
+ abis.each { abi ->
+ def downloadPath = new URL("${url}/${abi}/libdjl_tokenizer.so")
+ def file = file("${project.projectDir}/jnilib/${abi}/libdjl_tokenizer.so")
+ if (!file.exists()) {
+ project.logger.lifecycle("${url}/${abi}/libdjl_tokenizer.so")
+ file.getParentFile().mkdirs()
+ downloadPath.withInputStream { i -> file.withOutputStream { it << i } }
+ }
+ }
+ }
+}
+
+assemble.dependsOn processResources
diff --git a/android/tokenizer-native/consumer-rules.pro b/android/tokenizer-native/consumer-rules.pro
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/android/tokenizer-native/gradlew b/android/tokenizer-native/gradlew
new file mode 120000
index 00000000000..502f5a2d3ec
--- /dev/null
+++ b/android/tokenizer-native/gradlew
@@ -0,0 +1 @@
+../gradlew
\ No newline at end of file
diff --git a/android/tokenizer-native/proguard-rules.pro b/android/tokenizer-native/proguard-rules.pro
new file mode 100644
index 00000000000..f1b424510da
--- /dev/null
+++ b/android/tokenizer-native/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+# http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+# public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
diff --git a/android/tokenizer-native/src/main/AndroidManifest.xml b/android/tokenizer-native/src/main/AndroidManifest.xml
new file mode 100644
index 00000000000..c4e6c98d775
--- /dev/null
+++ b/android/tokenizer-native/src/main/AndroidManifest.xml
@@ -0,0 +1 @@
+
diff --git a/extensions/tokenizers/build.cmd b/extensions/tokenizers/build.cmd
index b7fd68b9baa..49efdfe88a4 100644
--- a/extensions/tokenizers/build.cmd
+++ b/extensions/tokenizers/build.cmd
@@ -11,4 +11,4 @@ cargo build --manifest-path %RUST_MANIFEST% --release
@rem for nightly ci
md build\jnilib\win-x86_64\cpu
-copy rust\target\release\djl.dll build\jnilib\win-x86_64\cpu\tokenizers.dll
+copy rust\target\release\djl_tokenizer.dll build\jnilib\win-x86_64\cpu\tokenizers.dll
diff --git a/extensions/tokenizers/build.gradle.kts b/extensions/tokenizers/build.gradle.kts
index 55ae6c9d818..dbf32273e71 100644
--- a/extensions/tokenizers/build.gradle.kts
+++ b/extensions/tokenizers/build.gradle.kts
@@ -131,6 +131,22 @@ tasks {
}
}
+ register("compileAndroidJNI"){
+ doFirst {
+ for (abi in listOf("armeabi-v7a", "arm64-v8a", "x86", "x86_64")) {
+ exec {
+ commandLine("bash", "build_android.sh", abi)
+ }
+ val ciDir = project.projectDir / "jnilib/${libs.versions.djl.get()}/android/$abi"
+ copy {
+ from(buildDirectory / "jnilib" / "$abi")
+ into(ciDir)
+ }
+ delete("$buildDirectory/jnilib")
+ }
+ }
+ }
+
register("formatPython") {
doFirst {
exec {
diff --git a/extensions/tokenizers/build.sh b/extensions/tokenizers/build.sh
index 6478708a5ab..332de2bb5ff 100755
--- a/extensions/tokenizers/build.sh
+++ b/extensions/tokenizers/build.sh
@@ -21,10 +21,10 @@ function copy_files() {
flavor="$2"
if [[ $PLATFORM == 'darwin' ]]; then
mkdir -p "build/jnilib/osx-$arch/$flavor"
- cp -f rust/target/release/libdjl.dylib "build/jnilib/osx-$arch/$flavor/libtokenizers.dylib"
+ cp -f rust/target/release/libdjl_tokenizer.dylib "build/jnilib/osx-$arch/$flavor/libtokenizers.dylib"
elif [[ $PLATFORM == 'linux' ]]; then
mkdir -p "build/jnilib/linux-$arch/$flavor"
- cp -f rust/target/release/libdjl.so "build/jnilib/linux-$arch/$flavor/libtokenizers.so"
+ cp -f rust/target/release/libdjl_tokenizer.so "build/jnilib/linux-$arch/$flavor/libtokenizers.so"
fi
}
diff --git a/extensions/tokenizers/build_android.sh b/extensions/tokenizers/build_android.sh
new file mode 100644
index 00000000000..05034469ded
--- /dev/null
+++ b/extensions/tokenizers/build_android.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+set -ex
+
+WORK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+export WORK_DIR
+
+FLAVOR=$1
+
+if [ ! -d "build" ]; then
+ mkdir build
+fi
+
+pushd .
+
+if [ ! -d "build" ]; then
+ mkdir build
+fi
+
+rm -rf build/classes
+mkdir build/classes
+
+javac -sourcepath src/main/java/ src/main/java/ai/djl/huggingface/tokenizers/jni/TokenizersLibrary.java -h build/include -d build/classes
+javac -sourcepath src/main/java/ src/main/java/ai/djl/engine/rust/RustLibrary.java -h build/include -d build/classes
+
+cd rust/
+cargo ndk -t $FLAVOR -o $WORK_DIR/build/jnilib --platform=21 build --release
+cd ..
+popd
diff --git a/extensions/tokenizers/rust/Cargo.toml b/extensions/tokenizers/rust/Cargo.toml
index 93394982d82..ebfa6a93ff7 100644
--- a/extensions/tokenizers/rust/Cargo.toml
+++ b/extensions/tokenizers/rust/Cargo.toml
@@ -1,5 +1,5 @@
[package]
-name = "djl"
+name = "djl_tokenizer"
version = "0.1.0"
authors = ["Frank Liu "]
edition = "2021"
@@ -13,7 +13,7 @@ candle-flash-attn = { version = "*", optional = true }
candle-cublaslt = { git = "/~https://github.com/huggingface/candle-cublaslt", rev = "cf789b7dd6d4abb19b03b9556442f94f0588b4a0", optional = true }
candle-layer-norm = { git = "/~https://github.com/xyang16/candle-layer-norm", rev = "e574de6a7f88bafbede8edf9ee43170c6a8ce51a", optional = true }
candle-rotary = { git = "/~https://github.com/huggingface/candle-rotary", rev = "0a718a0856569a92f3112e64f10d07e4447822e8", optional = true }
-tokenizers = { version = "0.20.3", features = ["http"] }
+tokenizers = { version = "0.20.3" }
half = "2.4.0"
tracing = "0.1.40"
safetensors = "0.4.3"
@@ -33,6 +33,9 @@ openssl-src = { git = "/~https://github.com/alexcrichton/openssl-src-rs", rev = "c
[target.'cfg(target_os = "linux")'.dependencies]
openssl = { version = "0.10", features = ["vendored"] }
+[target.'cfg(not(target_os = "android"))'.dependencies]
+tokenizers = { version = "0.20.0", features = ["http"] }
+
[lib]
crate-type = ["cdylib"]
diff --git a/extensions/tokenizers/rust/src/build.rs b/extensions/tokenizers/rust/src/build.rs
new file mode 100644
index 00000000000..c3da5df22a4
--- /dev/null
+++ b/extensions/tokenizers/rust/src/build.rs
@@ -0,0 +1,26 @@
+// see /~https://github.com/bbqsrc/cargo-ndk?tab=readme-ov-file#usage
+
+use std::{env, path::{Path, PathBuf}};
+
+fn main() {
+ if env::var("CARGO_CFG_TARGET_OS").unwrap() == "android" {
+ android();
+ }
+}
+
+fn android() {
+ println!("cargo:rustc-link-lib=c++_shared");
+
+ if let Ok(output_path) = env::var("CARGO_NDK_OUTPUT_PATH") {
+ let sysroot_libs_path =
+ PathBuf::from(env::var_os("CARGO_NDK_SYSROOT_LIBS_PATH").unwrap());
+ let lib_path = sysroot_libs_path.join("libc++_shared.so");
+ std::fs::copy(
+ lib_path,
+ Path::new(&output_path)
+ .join(&env::var("CARGO_NDK_ANDROID_TARGET").unwrap())
+ .join("libc++_shared.so"),
+ )
+ .unwrap();
+ }
+}
\ No newline at end of file
diff --git a/extensions/tokenizers/rust/src/lib.rs b/extensions/tokenizers/rust/src/lib.rs
index 59bc0a4f2f4..3d4cd1264ad 100644
--- a/extensions/tokenizers/rust/src/lib.rs
+++ b/extensions/tokenizers/rust/src/lib.rs
@@ -40,8 +40,12 @@ use tk::tokenizer::{EncodeInput, Encoding};
use tk::utils::padding::{PaddingParams, PaddingStrategy};
use tk::utils::truncation::{TruncationParams, TruncationStrategy};
use tk::Tokenizer;
-use tk::{FromPretrainedParameters, Offsets};
+use tk::Offsets;
+#[cfg(not(target_os = "android"))]
+use tk::FromPretrainedParameters;
+
+#[cfg(not(target_os = "android"))]
#[no_mangle]
pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizer<
'local,
@@ -72,6 +76,20 @@ pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_
}
}
+#[cfg(target_os = "android")]
+#[no_mangle]
+pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizer<
+ 'local,
+>(
+ mut env: JNIEnv<'local>,
+ _: JObject,
+ _: JString,
+ _: JString,
+) -> jlong {
+ env.throw("Not supported on Android").unwrap();
+ 0
+}
+
#[no_mangle]
pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizerFromString<
'local,
diff --git a/extensions/tokenizers/src/main/java/ai/djl/huggingface/tokenizers/jni/LibUtils.java b/extensions/tokenizers/src/main/java/ai/djl/huggingface/tokenizers/jni/LibUtils.java
index 5e779af9a9b..d8f981a8915 100644
--- a/extensions/tokenizers/src/main/java/ai/djl/huggingface/tokenizers/jni/LibUtils.java
+++ b/extensions/tokenizers/src/main/java/ai/djl/huggingface/tokenizers/jni/LibUtils.java
@@ -66,6 +66,11 @@ public static void checkStatus() {
}
private static void loadLibrary() {
+ if ("http://www.android.com/".equals(System.getProperty("java.vendor.url"))) {
+ System.loadLibrary("djl_tokenizer"); // NOPMD
+ return;
+ }
+
String[] libs;
if (System.getProperty("os.name").startsWith("Windows")) {
libs =