feat: switch all object detection models to Triton (#622)

- switch to Triton, use gRPC protocol for faster transfer - only request needed output fields (resolves #942) - get rid of CategoryIndex and protobuf-generated files
openfoodfacts · Nov 8, 2022 · 3c786c4 · 3c786c4
1 parent 58ae4a9
commit 3c786c4
Show file tree

Hide file tree

Showing 17 changed files with 426 additions and 2,089 deletions.
diff --git a/.env b/.env
@@ -37,6 +37,9 @@ POSTGRES_USER=postgres
 POSTGRES_PASSWORD=postgres
 POSTGRES_EXPOSE=127.0.0.1:5432
 
+# Triton ML inference server
+TRITON_HOST=triton
+
 # InfluxDB
 INFLUXDB_HOST=
 INFLUXDB_PORT=8086

diff --git a/.github/workflows/container-deploy-ml.yml b/.github/workflows/container-deploy-ml.yml
@@ -4,7 +4,7 @@ on:
   push:
     branches:
       - master
-      - tf-serving-deploy-*
+      - ml-deploy-*
     # only update on push if ml.yml changed (does not apply to tags)
     paths:
       - docker/ml.yml

diff --git a/.github/workflows/container-deploy.yml b/.github/workflows/container-deploy.yml
@@ -98,7 +98,9 @@ jobs:
           echo "OFF_PASSWORD=${{ secrets.OFF_PASSWORD }}" >> .env
           echo "SENTRY_DSN=${{ secrets.SENTRY_DSN }}" >> .env
           echo "ELASTICSEARCH_HOSTS=elasticsearch" >> .env
+          # TODO remove when triton is deployed, and category model is retrained for triton
           echo "TF_SERVING_HOST=${{ secrets.TF_SERVING_HOST }}" >> .env
+          echo "TRITON_HOST=${{ secrets.TRITON_HOST }}" >> .env
           echo "INFLUXDB_HOST=${{ secrets.INFLUXDB_HOST }}" >> .env
           echo "INFLUXDB_PORT=8086" >> .env
           echo "INFLUXDB_DB_NAME=${{ matrix.env == 'robotoff-org' && 'off_metrics' || 'off_net_metrics' }}" >> .env

diff --git a/.github/workflows/models-deploy.yml b/.github/workflows/models-deploy.yml
@@ -1,14 +1,12 @@
-name: Sync Tensorflow models to Proxmox VM
-on: 
+name: Sync ML models to Proxmox VM
+on:
   push:
-    branches:
-      - master
-      - deploy-*
-    tags: 
+    tags:
       - v*.*.*
+      - model-deploy*
 jobs:
-  tfmodels-sync:
-    name: Sync TF models
+  ml-models-sync:
+    name: Sync ML models
     runs-on: ubuntu-latest
     environment: ${{ matrix.env }}
     concurrency: ${{ matrix.env }}-${{ matrix.asset_name }}
@@ -35,17 +33,17 @@ jobs:
           proxy_username: ${{ secrets.USERNAME }}
           proxy_key: ${{ secrets.SSH_PRIVATE_KEY }}
           script: |
-            cd ${{ matrix.env }}/tf_models
+            cd ${{ matrix.env }}/models
             dir=`echo ${{ matrix.asset_name }} | sed 's/tf-//g'`
             mkdir -p ${dir} ${dir}/1
-            wget -cO - /~https://github.com/openfoodfacts/robotoff-models/releases/download/${{ matrix.asset_name }}-1.0/label_map.pbtxt > ${dir}/labels.pbtxt
-            wget -cO - /~https://github.com/openfoodfacts/robotoff-models/releases/download/${{ matrix.asset_name }}-1.0/saved_model.tar.gz > ${dir}/1/saved_model.tar.gz
-            cd ${dir}/1
-            tar -xzvf saved_model.tar.gz --strip-component=1
-            rm saved_model.tar.gz
+            wget -cO - /~https://github.com/openfoodfacts/robotoff-models/releases/download/${{ matrix.asset_name }}-1.0/labels.txt > ${dir}/labels.txt
+
+            if [ ${{ matrix.env }} = 'robotoff-ml' ]; then
+              wget -cO - /~https://github.com/openfoodfacts/robotoff-models/releases/download/${{ matrix.asset_name }}-1.0/model.onnx > ${dir}/1/model.onnx
+            fi
 
   # The category classifier has the pre- and post-processing built in, therefore
-  # it requires a slightly different sequence of steps to the 'tfmodels-sync' above.
+  # it requires a slightly different sequence of steps to the 'ml-models-sync' above.
   tfmodels-category-classifier-sync:
     name: Sync Category Classifier
     runs-on: ubuntu-latest
@@ -76,4 +74,3 @@ jobs:
             cd ${dir}/1
             tar -xzvf saved_model.tar.gz --strip-component=1
             rm saved_model.tar.gz
-
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -36,6 +36,8 @@ x-robotoff-base-env: &robotoff-base-env
   SENTRY_DSN:
   ELASTICSEARCH_HOSTS:
   TF_SERVING_HOST:
+  TRITON_HOST:
+
 
 services:
   api:

diff --git a/docker/ml.yml b/docker/ml.yml
@@ -14,5 +14,25 @@ services:
     networks:
       - webnet
 
+  triton:
+    restart: $RESTART_POLICY
+    # This is a custom built of Triton with:
+    # - GRPC/HTTP support
+    # - CPU only (we don't have GPU in production)
+    # - Tensorflow 2 SavedModel and ONNX support
+    # This allows us to reduce significantly the image size
+    # See https://gist.github.com/raphael0202/091e521f2c79a8db8c6e9aceafb6e0b9 for build script
+    image: ghcr.io/openfoodfacts/triton:latest
+    ports:
+      - ${TRITON_EXPOSE_HTTP:-8000}:8000
+      - ${TRITON_EXPOSE_GRPC:-8001}:8001
+      - ${TRITON_EXPOSE_METRICS:-8002}:8002
+    volumes:
+      - ./models:/models
+    entrypoint: "tritonserver --model-repository=/models"
+    mem_limit: 10g
+    networks:
+      - webnet
+
 networks:
   webnet: