Skip to content

Commit

Permalink
HuggingFaceTokenizer: add support for Android (#3531)
Browse files Browse the repository at this point in the history
* tokenizers: add support for building on Android

- Build the Rust library for Android using `cargo-ndk`
- Disable "http" feature for Rust library when building with Android
- Modify to load native library in Android

* android: Create tokenizer-native

- Copy from pytorch-native and change for tokenizer native modules

* Update build.cmd

---------

Co-authored-by: Xin Yang <xyang19@gmail.com>
  • Loading branch information
naveen521kk and xyang16 authored Nov 17, 2024
1 parent ba4d837 commit f00f18e
Show file tree
Hide file tree
Showing 15 changed files with 255 additions and 6 deletions.
1 change: 1 addition & 0 deletions android/settings.gradle
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
rootProject.name='android'
include ':core'
include ':pytorch-native'
include ':tokenizer-native'
3 changes: 3 additions & 0 deletions android/tokenizer-native/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Tokenizer

This folder contains build of tokenizer-native library for Android.
125 changes: 125 additions & 0 deletions android/tokenizer-native/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
apply plugin: 'com.android.library'
apply plugin: 'maven-publish'
apply plugin: 'signing'

android {
compileSdkVersion 34
namespace "ai.djl.android.tokenizer_native"

defaultConfig {
minSdkVersion 21
targetSdkVersion 34
versionCode 1
versionName "1.0"

consumerProguardFiles 'consumer-rules.pro'
}

buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro"
}
}

sourceSets {
main {
jniLibs.srcDirs = ["jnilib/"]
}
}
publishing {
singleVariant("release") {
withSourcesJar()
}
}
}

// TODO: Making publish as an individual gradle file
signing {
required(project.hasProperty("staging") || project.hasProperty("snapshot"))
def signingKey = findProperty("signingKey")
def signingPassword = findProperty("signingPassword")
useInMemoryPgpKeys(signingKey, signingPassword)
sign publishing.publications
}

afterEvaluate {
publishing {
publications {
release(MavenPublication) {
from components.release
pom {
name = "Tokenizer Android native library"
description = "Tokenizer Android native library binary distribution"
url = "http://www.djl.ai/android"
packaging = "aar"

licenses {
license {
name = "The Apache License, Version 2.0"
url = "https://www.apache.org/licenses/LICENSE-2.0"
}
}

scm {
connection = "scm:git:git@github.com:deepjavalibrary/djl.git"
developerConnection = "scm:git:git@github.com:deepjavalibrary/djl.git"
url = "/~https://github.com/deepjavalibrary/djl"
tag = "HEAD"
}

developers {
developer {
name = "DJL.AI Team"
email = "djl-dev@amazon.com"
organization = "Amazon AI"
organizationUrl = "https://amazon.com"
}
}
}
}
}

repositories {
maven {
if (project.hasProperty("snapshot")) {
name = "snapshot"
url = "https://oss.sonatype.org/content/repositories/snapshots/"
credentials {
username = findProperty("ossrhUsername")
password = findProperty("ossrhPassword")
}
} else if (project.hasProperty("staging")) {
name = "staging"
url = "https://oss.sonatype.org/service/local/staging/deploy/maven2/"
credentials {
username = findProperty("ossrhUsername")
password = findProperty("ossrhPassword")
}
} else {
name = "local"
url = "build/repo"
}
}
}
}
}


tasks.register('processResources') {
doLast {
def url = "https://publish.djl.ai/tokenizer/jnilib/android"
def abis = ["armeabi-v7a", "arm64-v8a", "x86", "x86_64"]
abis.each { abi ->
def downloadPath = new URL("${url}/${abi}/libdjl_tokenizer.so")
def file = file("${project.projectDir}/jnilib/${abi}/libdjl_tokenizer.so")
if (!file.exists()) {
project.logger.lifecycle("${url}/${abi}/libdjl_tokenizer.so")
file.getParentFile().mkdirs()
downloadPath.withInputStream { i -> file.withOutputStream { it << i } }
}
}
}
}

assemble.dependsOn processResources
Empty file.
1 change: 1 addition & 0 deletions android/tokenizer-native/gradlew
21 changes: 21 additions & 0 deletions android/tokenizer-native/proguard-rules.pro
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile
1 change: 1 addition & 0 deletions android/tokenizer-native/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<manifest xmlns:android="http://schemas.android.com/apk/res/android"/>
2 changes: 1 addition & 1 deletion extensions/tokenizers/build.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ cargo build --manifest-path %RUST_MANIFEST% --release

@rem for nightly ci
md build\jnilib\win-x86_64\cpu
copy rust\target\release\djl.dll build\jnilib\win-x86_64\cpu\tokenizers.dll
copy rust\target\release\djl_tokenizer.dll build\jnilib\win-x86_64\cpu\tokenizers.dll
16 changes: 16 additions & 0 deletions extensions/tokenizers/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,22 @@ tasks {
}
}

register("compileAndroidJNI"){
doFirst {
for (abi in listOf("armeabi-v7a", "arm64-v8a", "x86", "x86_64")) {
exec {
commandLine("bash", "build_android.sh", abi)
}
val ciDir = project.projectDir / "jnilib/${libs.versions.djl.get()}/android/$abi"
copy {
from(buildDirectory / "jnilib" / "$abi")
into(ciDir)
}
delete("$buildDirectory/jnilib")
}
}
}

register("formatPython") {
doFirst {
exec {
Expand Down
4 changes: 2 additions & 2 deletions extensions/tokenizers/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ function copy_files() {
flavor="$2"
if [[ $PLATFORM == 'darwin' ]]; then
mkdir -p "build/jnilib/osx-$arch/$flavor"
cp -f rust/target/release/libdjl.dylib "build/jnilib/osx-$arch/$flavor/libtokenizers.dylib"
cp -f rust/target/release/libdjl_tokenizer.dylib "build/jnilib/osx-$arch/$flavor/libtokenizers.dylib"
elif [[ $PLATFORM == 'linux' ]]; then
mkdir -p "build/jnilib/linux-$arch/$flavor"
cp -f rust/target/release/libdjl.so "build/jnilib/linux-$arch/$flavor/libtokenizers.so"
cp -f rust/target/release/libdjl_tokenizer.so "build/jnilib/linux-$arch/$flavor/libtokenizers.so"
fi
}

Expand Down
29 changes: 29 additions & 0 deletions extensions/tokenizers/build_android.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash

set -ex

WORK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export WORK_DIR

FLAVOR=$1

if [ ! -d "build" ]; then
mkdir build
fi

pushd .

if [ ! -d "build" ]; then
mkdir build
fi

rm -rf build/classes
mkdir build/classes

javac -sourcepath src/main/java/ src/main/java/ai/djl/huggingface/tokenizers/jni/TokenizersLibrary.java -h build/include -d build/classes
javac -sourcepath src/main/java/ src/main/java/ai/djl/engine/rust/RustLibrary.java -h build/include -d build/classes

cd rust/
cargo ndk -t $FLAVOR -o $WORK_DIR/build/jnilib --platform=21 build --release
cd ..
popd
7 changes: 5 additions & 2 deletions extensions/tokenizers/rust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "djl"
name = "djl_tokenizer"
version = "0.1.0"
authors = ["Frank Liu <frankfliu2000@gmail.com>"]
edition = "2021"
Expand All @@ -13,7 +13,7 @@ candle-flash-attn = { version = "*", optional = true }
candle-cublaslt = { git = "/~https://github.com/huggingface/candle-cublaslt", rev = "cf789b7dd6d4abb19b03b9556442f94f0588b4a0", optional = true }
candle-layer-norm = { git = "/~https://github.com/xyang16/candle-layer-norm", rev = "e574de6a7f88bafbede8edf9ee43170c6a8ce51a", optional = true }
candle-rotary = { git = "/~https://github.com/huggingface/candle-rotary", rev = "0a718a0856569a92f3112e64f10d07e4447822e8", optional = true }
tokenizers = { version = "0.20.3", features = ["http"] }
tokenizers = { version = "0.20.3" }
half = "2.4.0"
tracing = "0.1.40"
safetensors = "0.4.3"
Expand All @@ -33,6 +33,9 @@ openssl-src = { git = "/~https://github.com/alexcrichton/openssl-src-rs", rev = "c
[target.'cfg(target_os = "linux")'.dependencies]
openssl = { version = "0.10", features = ["vendored"] }

[target.'cfg(not(target_os = "android"))'.dependencies]
tokenizers = { version = "0.20.0", features = ["http"] }

[lib]
crate-type = ["cdylib"]

Expand Down
26 changes: 26 additions & 0 deletions extensions/tokenizers/rust/src/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// see /~https://github.com/bbqsrc/cargo-ndk?tab=readme-ov-file#usage

use std::{env, path::{Path, PathBuf}};

fn main() {
if env::var("CARGO_CFG_TARGET_OS").unwrap() == "android" {
android();
}
}

fn android() {
println!("cargo:rustc-link-lib=c++_shared");

if let Ok(output_path) = env::var("CARGO_NDK_OUTPUT_PATH") {
let sysroot_libs_path =
PathBuf::from(env::var_os("CARGO_NDK_SYSROOT_LIBS_PATH").unwrap());
let lib_path = sysroot_libs_path.join("libc++_shared.so");
std::fs::copy(
lib_path,
Path::new(&output_path)
.join(&env::var("CARGO_NDK_ANDROID_TARGET").unwrap())
.join("libc++_shared.so"),
)
.unwrap();
}
}
20 changes: 19 additions & 1 deletion extensions/tokenizers/rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,12 @@ use tk::tokenizer::{EncodeInput, Encoding};
use tk::utils::padding::{PaddingParams, PaddingStrategy};
use tk::utils::truncation::{TruncationParams, TruncationStrategy};
use tk::Tokenizer;
use tk::{FromPretrainedParameters, Offsets};
use tk::Offsets;

#[cfg(not(target_os = "android"))]
use tk::FromPretrainedParameters;

#[cfg(not(target_os = "android"))]
#[no_mangle]
pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizer<
'local,
Expand Down Expand Up @@ -72,6 +76,20 @@ pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_
}
}

#[cfg(target_os = "android")]
#[no_mangle]
pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizer<
'local,
>(
mut env: JNIEnv<'local>,
_: JObject,
_: JString,
_: JString,
) -> jlong {
env.throw("Not supported on Android").unwrap();
0
}

#[no_mangle]
pub extern "system" fn Java_ai_djl_huggingface_tokenizers_jni_TokenizersLibrary_createTokenizerFromString<
'local,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ public static void checkStatus() {
}

private static void loadLibrary() {
if ("http://www.android.com/".equals(System.getProperty("java.vendor.url"))) {
System.loadLibrary("djl_tokenizer"); // NOPMD
return;
}

String[] libs;
if (System.getProperty("os.name").startsWith("Windows")) {
libs =
Expand Down

0 comments on commit f00f18e

Please sign in to comment.