From f23e05711df289700ce9a3d77b2c85dfdb0d2b65 Mon Sep 17 00:00:00 2001 From: <> Date: Mon, 5 Aug 2024 15:01:30 +0000 Subject: [PATCH] Deployed 4d4757d with MkDocs version: 1.6.0 --- .nojekyll | 0 404.html | 4504 +++++++++ .../all-in-one-local-machine/index.html | 4560 +++++++++ Deployment/index.html | 4787 ++++++++++ .../private-offline-deployment/index.html | 4848 ++++++++++ Deployment/trial-on-cloud/index.html | 4806 ++++++++++ Deployment/your-cloud/index.html | 4659 ++++++++++ Modules/API/main/index.html | 4650 ++++++++++ Modules/Agent/index.html | 4692 ++++++++++ Modules/Client/Listener/index.html | 4707 ++++++++++ Modules/Client/Responder/index.html | 4561 +++++++++ Modules/index.html | 4565 +++++++++ Source/index.html | 4759 ++++++++++ Sources/API/api/asgi/index.html | 4640 ++++++++++ Sources/API/api/settings/index.html | 4642 ++++++++++ Sources/API/api/urls/index.html | 4649 ++++++++++ Sources/API/api/wsgi/index.html | 4640 ++++++++++ Sources/API/authenticate/admin/index.html | 4635 ++++++++++ Sources/API/authenticate/apps/index.html | 4635 ++++++++++ .../migrations/0001_init/index.html | 4575 +++++++++ Sources/API/authenticate/models/index.html | 4635 ++++++++++ .../API/authenticate/serializers/index.html | 4635 ++++++++++ Sources/API/authenticate/tests/index.html | 4619 ++++++++++ Sources/API/authenticate/urls/index.html | 4635 ++++++++++ .../utils/fire_and_forget/index.html | 4693 ++++++++++ .../authenticate/utils/get_logger/index.html | 4637 ++++++++++ .../API/authenticate/utils/timer/index.html | 4986 ++++++++++ Sources/API/authenticate/views/index.html | 4777 ++++++++++ Sources/API/hardware/admin/index.html | 5067 ++++++++++ Sources/API/hardware/apps/index.html | 4635 ++++++++++ Sources/API/hardware/forms/index.html | 4635 ++++++++++ .../hardware/migrations/0001_init/index.html | 4575 +++++++++ .../migrations/0002_add_rag/index.html | 4575 +++++++++ Sources/API/hardware/models/index.html | 5744 ++++++++++++ Sources/API/hardware/serializers/index.html | 4635 ++++++++++ Sources/API/hardware/signals/index.html | 4691 ++++++++++ Sources/API/hardware/tests/index.html | 4619 ++++++++++ Sources/API/hardware/urls/index.html | 4635 ++++++++++ Sources/API/hardware/views/index.html | 5570 +++++++++++ Sources/API/llm/admin/index.html | 4635 ++++++++++ Sources/API/llm/apps/index.html | 4635 ++++++++++ Sources/API/llm/llm/config/index.html | 4637 ++++++++++ .../commands/check_models/index.html | 4868 ++++++++++ .../API/llm/migrations/0001_init/index.html | 4575 +++++++++ Sources/API/llm/models/index.html | 4954 ++++++++++ Sources/API/llm/serializers/index.html | 4635 ++++++++++ Sources/API/llm/tests/index.html | 4619 ++++++++++ Sources/API/llm/urls/index.html | 4635 ++++++++++ Sources/API/llm/views/index.html | 4839 ++++++++++ Sources/API/manage/index.html | 4697 ++++++++++ Sources/API/orchestrator/admin/index.html | 4635 ++++++++++ Sources/API/orchestrator/apps/index.html | 4635 ++++++++++ .../orchestrator/chain/clusters/index.html | 4874 ++++++++++ .../completed_emotion_detection/index.html | 4789 ++++++++++ .../chain/completed_hf_llm/index.html | 4744 ++++++++++ .../chain/completed_openai_gpt_35/index.html | 4741 ++++++++++ .../index.html | 4745 ++++++++++ .../index.html | 4741 ++++++++++ .../completed_openai_speech2text/index.html | 4849 ++++++++++ .../completed_openai_text2speech/index.html | 4802 ++++++++++ .../completed_quantization_llm/index.html | 4744 ++++++++++ .../chain/completed_rag/index.html | 4744 ++++++++++ .../chain/completed_speech2text/index.html | 4830 ++++++++++ .../chain/completed_task/index.html | 4821 ++++++++++ .../chain/completed_text2speech/index.html | 4800 ++++++++++ .../chain/created_data_text/index.html | 4879 ++++++++++ .../API/orchestrator/chain/manager/index.html | 5539 +++++++++++ .../API/orchestrator/chain/models/index.html | 4637 ++++++++++ .../API/orchestrator/chain/signals/index.html | 4637 ++++++++++ .../API/orchestrator/chain/utils/index.html | 5153 +++++++++++ .../metrics/accuracy_benchmark/index.html | 7816 ++++++++++++++++ .../metrics/latency_benchmark/index.html | 8204 +++++++++++++++++ .../API/orchestrator/metrics/utils/index.html | 4938 ++++++++++ .../migrations/0001_init/index.html | 4575 +++++++++ Sources/API/orchestrator/models/index.html | 5306 +++++++++++ .../API/orchestrator/serializers/index.html | 4635 ++++++++++ Sources/API/orchestrator/tests/index.html | 4619 ++++++++++ Sources/API/orchestrator/urls/index.html | 4635 ++++++++++ Sources/API/orchestrator/views/index.html | 5549 +++++++++++ Sources/Agent/main/index.html | 5980 ++++++++++++ Sources/Agent/models/parameters/index.html | 4635 ++++++++++ Sources/Agent/models/results/index.html | 4635 ++++++++++ Sources/Agent/models/task/index.html | 4746 ++++++++++ Sources/Agent/models/track_type/index.html | 4635 ++++++++++ .../features_extraction/index.html | 5321 +++++++++++ .../emotion_detection/handler/index.html | 5360 +++++++++++ .../emotion_detection/sentiment/index.html | 5093 ++++++++++ .../modules/general_ml/handler/index.html | 5153 +++++++++++ .../modules/general_ml/ml_models/index.html | 4637 ++++++++++ .../Agent/modules/hf_llm/handler/index.html | 4986 ++++++++++ .../Agent/modules/openai/handler/index.html | 6236 +++++++++++++ .../adaptor_worker/index.html | 5231 +++++++++++ .../quantization_llm/handler/index.html | 5197 +++++++++++ .../quantization_llm/models/index.html | 5150 +++++++++++ Sources/Agent/modules/rag/handler/index.html | 4830 ++++++++++ .../modules/rag/neo4j_connector/index.html | 4637 ++++++++++ .../rag/postgresql_connector/index.html | 4637 ++++++++++ .../speech_to_text/speech2text/index.html | 5408 +++++++++++ .../text_to_speech/text2speech/index.html | 5308 +++++++++++ Sources/Agent/setup/index.html | 4633 ++++++++++ Sources/Agent/storage/index.html | 6076 ++++++++++++ Sources/Agent/utils/api/index.html | 5921 ++++++++++++ Sources/Agent/utils/aws/index.html | 4635 ++++++++++ Sources/Agent/utils/constants/index.html | 4691 ++++++++++ Sources/Agent/utils/get_logger/index.html | 4719 ++++++++++ .../utils/storage/api_sync_handler/index.html | 4757 ++++++++++ .../storage/local_sync_handler/index.html | 5024 ++++++++++ .../utils/storage/s3_sync_handler/index.html | 4759 ++++++++++ Sources/Agent/utils/time_logger/index.html | 4955 ++++++++++ Sources/Agent/utils/time_tracker/index.html | 4713 ++++++++++ Sources/Agent/utils/timer/index.html | 4984 ++++++++++ Sources/Client/Listener/api/index.html | 6006 ++++++++++++ .../Client/Listener/audios_acquire/index.html | 5531 +++++++++++ Sources/Client/Listener/constants/index.html | 4709 ++++++++++ .../Listener/mock/data_extraction/index.html | 5659 ++++++++++++ Sources/Client/Listener/setup/index.html | 4575 +++++++++ Sources/Client/Listener/storage/index.html | 5676 ++++++++++++ Sources/Client/Listener/utils/index.html | 5131 +++++++++++ .../Client/Listener/videos_acquire/index.html | 5296 +++++++++++ Sources/Client/Responder/api/index.html | 5093 ++++++++++ Sources/Client/Responder/constants/index.html | 4681 ++++++++++ .../Client/Responder/play_speech/index.html | 5080 ++++++++++ Sources/Client/Responder/setup/index.html | 4575 +++++++++ Sources/Client/Responder/utils/index.html | 5212 +++++++++++ Tutorial/annotation_customisation/index.html | 4852 ++++++++++ Tutorial/benchmark_and_annotation/index.html | 4741 ++++++++++ Tutorial/case_study/index.html | 4733 ++++++++++ Tutorial/index.html | 4564 +++++++++ Tutorial/pipeline_customisation/index.html | 5207 +++++++++++ Tutorial/setup/index.html | 4671 ++++++++++ Tutorial/video_demo/index.html | 4683 ++++++++++ assets/_mkdocstrings.css | 119 + assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.fe8b6f2b.min.js | 29 + assets/javascripts/bundle.fe8b6f2b.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 ++++++++++++++ .../workers/search.b8dbb3d2.min.js | 42 + .../workers/search.b8dbb3d2.min.js.map | 7 + assets/stylesheets/main.3cba04c6.min.css | 1 + assets/stylesheets/main.3cba04c6.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + images/AI4WA.png | Bin 0 -> 14394 bytes images/AI4WA.svg | 50 + images/ArchitectureDesign.jpg | Bin 0 -> 1820562 bytes images/Audio.png | Bin 0 -> 165437 bytes images/GPT-4o.jpg | Bin 0 -> 887481 bytes images/OpenOmni.jpg | Bin 0 -> 1042337 bytes images/Tasks.png | Bin 0 -> 175612 bytes images/Triangle.jpg | Bin 0 -> 718181 bytes images/VoiceE2E.jpg | Bin 0 -> 843661 bytes images/accuracy_detail_progress.png | Bin 0 -> 126082 bytes images/accuracy_detail_results.png | Bin 0 -> 72201 bytes images/accuracy_overall.png | Bin 0 -> 38377 bytes images/accuracy_summary_1.png | Bin 0 -> 17997 bytes images/accuracy_summary_2.png | Bin 0 -> 36674 bytes images/accuracy_summary_3.png | Bin 0 -> 38707 bytes images/accuracy_summary_4.png | Bin 0 -> 22378 bytes images/add_token.png | Bin 0 -> 58760 bytes images/ai_running.png | Bin 0 -> 86266 bytes images/annotation_metrics.png | Bin 0 -> 71349 bytes images/annotation_overview.png | Bin 0 -> 308207 bytes images/annotation_progress_detail.png | Bin 0 -> 68608 bytes images/assign_tag.png | Bin 0 -> 122614 bytes images/audio_cli.png | Bin 0 -> 60775 bytes images/audio_speech.png | Bin 0 -> 83793 bytes images/benchmark_detail.png | Bin 0 -> 235892 bytes images/benchmark_summary.png | Bin 0 -> 307970 bytes images/client.jpg | Bin 0 -> 307482 bytes images/conversation_data.png | Bin 0 -> 274283 bytes images/detailed_latency.png | Bin 0 -> 259074 bytes images/emotion_annotation.png | Bin 0 -> 316051 bytes images/favicon.ico | Bin 0 -> 15406 bytes images/full_tasks.png | Bin 0 -> 413688 bytes images/gpt-4o-assistance.png | Bin 0 -> 235892 bytes images/gpt-4o.png | Bin 0 -> 307970 bytes images/gpt4oaccuracy.png | Bin 0 -> 72201 bytes images/grab_token.png | Bin 0 -> 81577 bytes images/individual_conversation.png | Bin 0 -> 310271 bytes images/latency_summary_stat.png | Bin 0 -> 201145 bytes images/model_data.png | Bin 0 -> 22110 bytes images/multi-turn-conversation.png | Bin 0 -> 39012 bytes images/multi-turn.png | Bin 0 -> 100725 bytes images/task_record.png | Bin 0 -> 165038 bytes images/video.png | Bin 0 -> 202117 bytes images/video_cli.png | Bin 0 -> 69024 bytes index.html | 5270 +++++++++++ objects.inv | Bin 0 -> 3534 bytes search/search_index.json | 1 + sitemap.xml | 653 ++ sitemap.xml.gz | Bin 0 -> 1208 bytes 224 files changed, 653597 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 Deployment/all-in-one-local-machine/index.html create mode 100644 Deployment/index.html create mode 100644 Deployment/private-offline-deployment/index.html create mode 100644 Deployment/trial-on-cloud/index.html create mode 100644 Deployment/your-cloud/index.html create mode 100644 Modules/API/main/index.html create mode 100644 Modules/Agent/index.html create mode 100644 Modules/Client/Listener/index.html create mode 100644 Modules/Client/Responder/index.html create mode 100644 Modules/index.html create mode 100644 Source/index.html create mode 100644 Sources/API/api/asgi/index.html create mode 100644 Sources/API/api/settings/index.html create mode 100644 Sources/API/api/urls/index.html create mode 100644 Sources/API/api/wsgi/index.html create mode 100644 Sources/API/authenticate/admin/index.html create mode 100644 Sources/API/authenticate/apps/index.html create mode 100644 Sources/API/authenticate/migrations/0001_init/index.html create mode 100644 Sources/API/authenticate/models/index.html create mode 100644 Sources/API/authenticate/serializers/index.html create mode 100644 Sources/API/authenticate/tests/index.html create mode 100644 Sources/API/authenticate/urls/index.html create mode 100644 Sources/API/authenticate/utils/fire_and_forget/index.html create mode 100644 Sources/API/authenticate/utils/get_logger/index.html create mode 100644 Sources/API/authenticate/utils/timer/index.html create mode 100644 Sources/API/authenticate/views/index.html create mode 100644 Sources/API/hardware/admin/index.html create mode 100644 Sources/API/hardware/apps/index.html create mode 100644 Sources/API/hardware/forms/index.html create mode 100644 Sources/API/hardware/migrations/0001_init/index.html create mode 100644 Sources/API/hardware/migrations/0002_add_rag/index.html create mode 100644 Sources/API/hardware/models/index.html create mode 100644 Sources/API/hardware/serializers/index.html create mode 100644 Sources/API/hardware/signals/index.html create mode 100644 Sources/API/hardware/tests/index.html create mode 100644 Sources/API/hardware/urls/index.html create mode 100644 Sources/API/hardware/views/index.html create mode 100644 Sources/API/llm/admin/index.html create mode 100644 Sources/API/llm/apps/index.html create mode 100644 Sources/API/llm/llm/config/index.html create mode 100644 Sources/API/llm/management/commands/check_models/index.html create mode 100644 Sources/API/llm/migrations/0001_init/index.html create mode 100644 Sources/API/llm/models/index.html create mode 100644 Sources/API/llm/serializers/index.html create mode 100644 Sources/API/llm/tests/index.html create mode 100644 Sources/API/llm/urls/index.html create mode 100644 Sources/API/llm/views/index.html create mode 100644 Sources/API/manage/index.html create mode 100644 Sources/API/orchestrator/admin/index.html create mode 100644 Sources/API/orchestrator/apps/index.html create mode 100644 Sources/API/orchestrator/chain/clusters/index.html create mode 100644 Sources/API/orchestrator/chain/completed_emotion_detection/index.html create mode 100644 Sources/API/orchestrator/chain/completed_hf_llm/index.html create mode 100644 Sources/API/orchestrator/chain/completed_openai_gpt_35/index.html create mode 100644 Sources/API/orchestrator/chain/completed_openai_gpt_4o_text_and_image/index.html create mode 100644 Sources/API/orchestrator/chain/completed_openai_gpt_4o_text_only/index.html create mode 100644 Sources/API/orchestrator/chain/completed_openai_speech2text/index.html create mode 100644 Sources/API/orchestrator/chain/completed_openai_text2speech/index.html create mode 100644 Sources/API/orchestrator/chain/completed_quantization_llm/index.html create mode 100644 Sources/API/orchestrator/chain/completed_rag/index.html create mode 100644 Sources/API/orchestrator/chain/completed_speech2text/index.html create mode 100644 Sources/API/orchestrator/chain/completed_task/index.html create mode 100644 Sources/API/orchestrator/chain/completed_text2speech/index.html create mode 100644 Sources/API/orchestrator/chain/created_data_text/index.html create mode 100644 Sources/API/orchestrator/chain/manager/index.html create mode 100644 Sources/API/orchestrator/chain/models/index.html create mode 100644 Sources/API/orchestrator/chain/signals/index.html create mode 100644 Sources/API/orchestrator/chain/utils/index.html create mode 100644 Sources/API/orchestrator/metrics/accuracy_benchmark/index.html create mode 100644 Sources/API/orchestrator/metrics/latency_benchmark/index.html create mode 100644 Sources/API/orchestrator/metrics/utils/index.html create mode 100644 Sources/API/orchestrator/migrations/0001_init/index.html create mode 100644 Sources/API/orchestrator/models/index.html create mode 100644 Sources/API/orchestrator/serializers/index.html create mode 100644 Sources/API/orchestrator/tests/index.html create mode 100644 Sources/API/orchestrator/urls/index.html create mode 100644 Sources/API/orchestrator/views/index.html create mode 100644 Sources/Agent/main/index.html create mode 100644 Sources/Agent/models/parameters/index.html create mode 100644 Sources/Agent/models/results/index.html create mode 100644 Sources/Agent/models/task/index.html create mode 100644 Sources/Agent/models/track_type/index.html create mode 100644 Sources/Agent/modules/emotion_detection/features_extraction/index.html create mode 100644 Sources/Agent/modules/emotion_detection/handler/index.html create mode 100644 Sources/Agent/modules/emotion_detection/sentiment/index.html create mode 100644 Sources/Agent/modules/general_ml/handler/index.html create mode 100644 Sources/Agent/modules/general_ml/ml_models/index.html create mode 100644 Sources/Agent/modules/hf_llm/handler/index.html create mode 100644 Sources/Agent/modules/openai/handler/index.html create mode 100644 Sources/Agent/modules/quantization_llm/adaptor_worker/index.html create mode 100644 Sources/Agent/modules/quantization_llm/handler/index.html create mode 100644 Sources/Agent/modules/quantization_llm/models/index.html create mode 100644 Sources/Agent/modules/rag/handler/index.html create mode 100644 Sources/Agent/modules/rag/neo4j_connector/index.html create mode 100644 Sources/Agent/modules/rag/postgresql_connector/index.html create mode 100644 Sources/Agent/modules/speech_to_text/speech2text/index.html create mode 100644 Sources/Agent/modules/text_to_speech/text2speech/index.html create mode 100644 Sources/Agent/setup/index.html create mode 100644 Sources/Agent/storage/index.html create mode 100644 Sources/Agent/utils/api/index.html create mode 100644 Sources/Agent/utils/aws/index.html create mode 100644 Sources/Agent/utils/constants/index.html create mode 100644 Sources/Agent/utils/get_logger/index.html create mode 100644 Sources/Agent/utils/storage/api_sync_handler/index.html create mode 100644 Sources/Agent/utils/storage/local_sync_handler/index.html create mode 100644 Sources/Agent/utils/storage/s3_sync_handler/index.html create mode 100644 Sources/Agent/utils/time_logger/index.html create mode 100644 Sources/Agent/utils/time_tracker/index.html create mode 100644 Sources/Agent/utils/timer/index.html create mode 100644 Sources/Client/Listener/api/index.html create mode 100644 Sources/Client/Listener/audios_acquire/index.html create mode 100644 Sources/Client/Listener/constants/index.html create mode 100644 Sources/Client/Listener/mock/data_extraction/index.html create mode 100644 Sources/Client/Listener/setup/index.html create mode 100644 Sources/Client/Listener/storage/index.html create mode 100644 Sources/Client/Listener/utils/index.html create mode 100644 Sources/Client/Listener/videos_acquire/index.html create mode 100644 Sources/Client/Responder/api/index.html create mode 100644 Sources/Client/Responder/constants/index.html create mode 100644 Sources/Client/Responder/play_speech/index.html create mode 100644 Sources/Client/Responder/setup/index.html create mode 100644 Sources/Client/Responder/utils/index.html create mode 100644 Tutorial/annotation_customisation/index.html create mode 100644 Tutorial/benchmark_and_annotation/index.html create mode 100644 Tutorial/case_study/index.html create mode 100644 Tutorial/index.html create mode 100644 Tutorial/pipeline_customisation/index.html create mode 100644 Tutorial/setup/index.html create mode 100644 Tutorial/video_demo/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.fe8b6f2b.min.js create mode 100644 assets/javascripts/bundle.fe8b6f2b.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js.map create mode 100644 assets/stylesheets/main.3cba04c6.min.css create mode 100644 assets/stylesheets/main.3cba04c6.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 images/AI4WA.png create mode 100644 images/AI4WA.svg create mode 100644 images/ArchitectureDesign.jpg create mode 100644 images/Audio.png create mode 100644 images/GPT-4o.jpg create mode 100644 images/OpenOmni.jpg create mode 100644 images/Tasks.png create mode 100644 images/Triangle.jpg create mode 100644 images/VoiceE2E.jpg create mode 100644 images/accuracy_detail_progress.png create mode 100644 images/accuracy_detail_results.png create mode 100644 images/accuracy_overall.png create mode 100644 images/accuracy_summary_1.png create mode 100644 images/accuracy_summary_2.png create mode 100644 images/accuracy_summary_3.png create mode 100644 images/accuracy_summary_4.png create mode 100644 images/add_token.png create mode 100644 images/ai_running.png create mode 100644 images/annotation_metrics.png create mode 100644 images/annotation_overview.png create mode 100644 images/annotation_progress_detail.png create mode 100644 images/assign_tag.png create mode 100644 images/audio_cli.png create mode 100644 images/audio_speech.png create mode 100644 images/benchmark_detail.png create mode 100644 images/benchmark_summary.png create mode 100644 images/client.jpg create mode 100644 images/conversation_data.png create mode 100644 images/detailed_latency.png create mode 100644 images/emotion_annotation.png create mode 100644 images/favicon.ico create mode 100644 images/full_tasks.png create mode 100644 images/gpt-4o-assistance.png create mode 100644 images/gpt-4o.png create mode 100644 images/gpt4oaccuracy.png create mode 100644 images/grab_token.png create mode 100644 images/individual_conversation.png create mode 100644 images/latency_summary_stat.png create mode 100644 images/model_data.png create mode 100644 images/multi-turn-conversation.png create mode 100644 images/multi-turn.png create mode 100644 images/task_record.png create mode 100644 images/video.png create mode 100644 images/video_cli.png create mode 100644 index.html create mode 100644 objects.inv create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/404.html b/404.html new file mode 100644 index 00000000..146e5121 --- /dev/null +++ b/404.html @@ -0,0 +1,4504 @@ + + + +
+ + + + + + + + + + + + + + +It will be what we have introduced in the Tutorial/setup Section.
+Shared Access of the Files (videos, images and audios)
+Under this mode, all the files will be shared on the same machine via the docker volume and file system, so there is no +need to transfer the video and audio data between different machines, to ensure API/Agent/Client all have access to the +files.
+You can check it from here: Tutorial/setup
+ + + + + + + + + + + + + + + +As we suggest in the introduction, we have four modes of deployment:
+ +If you want to easily get start, you can use our deployed API, the link +is https://openomni.ai4wa.com, to manage the tasks.
+If you want to test out the full setup locally, you can follow the guide in the All in One Local Machine
section.
If you are thinking about deploy it as a product, which is fully locally within a home network, addressing the privacy
+issue, you can follow the guide in the Private Offline Deployment
section.
If you are doing research with cluster of computing resources, or you want annotators to work on the same platform for a
+serious project, you can follow the guide in the Your Cloud
section.
We have three components in the stack to deploy:
+Something like this
+ +All the metadata will be communicated via the API, so here we need to think about how can we share the video and audio +data between Agent/Client/API.
+We have four STORAGE_SOLUTION for this four different scenarios:
+To switch between these four modes, all you need to do is to set the STORAGE_SOLUTION
environment variable before
+start the API
export STORAGE_SOLUTION=api
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It is to offload the Agent/Client/API modules into different machines however within the same network, for potential use +cases like privacy concerns, or to have a more robust system.
+One of the example deployment scenario for this is:
+Local Network File Sync
+To ensure the API/Agent/Client all have access to the files, we will need to sync the files between different machines.
+As other deployment methods, we will first deploy the API
module.
Login to the machine your API will deploy on, and clone the repo:
+git clone git@github.com:AI4WA/OpenOmniFramework.git
+cd ./OpenOmniFramework
+cd ./API
+export STORAGE_SOLUTION=local # this is for local mode
+
+# Run it inside docker, this is the easiest way to get started
+docker compose up
+
+Get your private ip of this machine.
+For Mac:
+ipconfig getifaddr en0
+
+For Linux:
+hostname -I
+
+For Windows:
+ipconfig
+
+After this, you should be able to access the API at http://<private-ip>:8000
for any device within the same network.
Login to the API admin, go to http://<private-ip>:8000/authtoken/tokenproxy/
and click Add Token
.
If you are a Linux or Mac for API module, then you can use rsync
to sync the files between different machines.
In this way, all you need to do is to start a new terminal and run the following command:
+cd ./OpenOmniFramework
+cd ./Client/Listener
+
+source venv/bin/activate
+
+# under this way, STORAGE_SOLUTION in API is local mode
+# sync the audio and video data to the API machine
+python3 storage.py --token your_token_from_step_2 --dest_dir api_machine_user@api_private_ip:/where/api/folder/is/Client/Listener/data --dest_password api_machine_password
+
+If you are a Windows user, you can use the api
mode storage solution to sync the files between different machines.
All you need to do is in Step 1, before starting the API, you need to run the following command:
+export API_STORAGE_MODE=api
+
+And then within the Listener
module, you can run the following command:
cd ./OpenOmniFramework
+cd ./Client/Listener
+
+source venv/bin/activate
+
+# sync the audio and video data to the API machine
+python3 storage.py --token your_token_from_step_2 --api_domain http://<private-ip>:8000
+
+However, this way will be a bit slower than the rsync
way, but should not be noticeable for testing purposes.
Login to the machine your Client will deploy on, which should have the Camera, Microphone and Speaker, and clone the +repo:
+# switch to a proper directory
+git clone git@github.com:AI4WA/OpenOmniFramework.git
+
+For the Listener
part, you will need to run the following commands:
cd ./OpenOmniFramework
+cd ./Client/Listener
+
+export DISPLAY=:0.0 # THIS IS SPECIFIC FOR RASPBERRY PI
+
+# create the virtual environment if this is your first time run this
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+# run video acquire
+python3 videos_acquire.py --token your_token_from_step_2 --api_domain http://<private-ip>:8000
+
+You should be able to see something like this:
+ +Then open a new terminal
+cd ./OpenOmniFramework
+cd ./Client/Listener
+
+source venv/bin/activate
+
+# run audio acquire
+python3 audios_acquire.py --token your_token_from_step_2 --track_cluster CLUSTER_GPT_4O_ETE_CONVERSATION --api_domain http://<private-ip>:8000
+
+# you can change the cluster to the one your need
+
+You will see something like this:
+ +Login to the machine your Agent will deploy on, and clone the repo:
+# switch to a proper directory
+git clone git@github.com:AI4WA/OpenOmniFramework.git
+
+Before you start the Agent, you will also need to first sort out the file sync between the API and Agent machine.
+Same as above, if you are a Linux or Mac user, you can use rsync
to sync the files between different machines.
cd ./OpenOmniFramework
+cd ./Agent
+
+python3 -m venv venv
+
+source venv/bin/activate
+
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+# run storage sync from API to Agent, both direction
+
+python3 storage.py --token your_token_from_step_2 --api_domain http://<private-ip>:8000 --dest_dir api_machine_user@api_private_ip:/where/api/folder/is/OpenOmniFramework/Agent/data --dest_password api_machine_password
+
+And then you are free to run the Agent models.
+cd ./OpenOmniFramework
+cd ./Agent
+
+source venv/bin/activate
+
+# run the Agent models
+
+python3 main.py --token your_token_from_step_2 --api_domain http://<private-ip>:8000
+
+The speech will be feed with the url, so it is fine, the complex logic is handled within the API side.
+cd ./OpenOmniFramework
+cd ./Client/Responder
+
+# create the virtual environment if this is your first time run this
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+# run the audio player
+
+python3 play_speech.py --token your_token
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In this one, the STORAGE_SOLUTION is api
, which means the audio and video data will be uploaded and downloaded via
+the API endpoint.
Step 0: Get a token
+Our deployed API is on https://openomni.ai4wa.com, you can use it to manage the tasks.
+Login with username admin
and password password
, do not change the password, as it is a shared account.
Then you are free to create a new account for yourself.
+And then use your own account to create a Token.
+ +So here all you need to do is deploy the Client
and Agent
part.
# switch to a proper directory
+git clone git@github.com:AI4WA/OpenOmniFramework.git
+
+cd ./OpenOmniFramework
+cd ./Client/Listener
+# create the virtual environment if this is your first time run this
+python3 -m venv venv
+source venv/bin/activate
+
+
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+python3 storage.py --token your_token_from_step_0 --api_domain https://openomni.ai4wa.com
+
+cd ./OpenOmniFramework
+cd ./Client/Listener
+
+
+source venv/bin/activate
+
+
+# run video acquire
+python3 videos_acquire.py --token your_token_from_step_0 --api_domain https://openomni.ai4wa.com
+
+You should be able to see something like this: +
+Then open a new terminal
+cd ./OpenOmniFramework
+cd ./Client/Listener
+
+# create the virtual environment if this is your first time run this
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+# run audio acquire
+python3 audios_acquire.py --token your_token_from_step_0 --track_cluster CLUSTER_GPT_4O_ETE_CONVERSATION --api_domain https://openomni.ai4wa.com
+# you can change the cluster to the one your need
+
+You will see something like this: +
+If everything works, you should be able to check the newly create Data Audios
, Data Videos
and Speech2Text
Tasks
+in API Admin page.
+Something like below:
+
+
+
Now we need to start Agent module to consume the Tasks
.
Same as above, we will need to first run the storage sync.
+cd ./OpenOmniFramework
+cd ./Agent
+
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+python3 storage.py --token your_token_from_step_0 --api_domain https://openomni.ai4wa.com
+
+Before we start the Agent module, there are some pre configurations we need to do.
+As provided functionalities within Agent modules support OpenAI call, HuggingFace call, and there is also our provided +emotion detection module.
+We need to get them setup first.
+Setup OpenAI and HuggingFace Environment Variable
+Create a .env
file in ./Agent
folder, and add the following content:
HF_TOKEN=Your_HuggingFace_Token
+OPENAI_API_KEY=Your_OpenAI_API_KEY
+
+Otherwise, you can run
+export HF_TOKEN=Your_HuggingFace_Token
+export OPENAI_API_KEY=Your_OpenAI_API_KEY
+
+For the model part, if you want to get our emotion detection model running, you will need to download the model +from download link
+And put it in the folder: ./Agent/data/models/emotion_detection/model_data
.
+It should be like this
Then you should be ready to run the Agent module.
+# run the Agent module
+python3 main.py --token your_token_from_step_3
+
+You can also skip the steps to install the requirements, directly run the Agent module with docker.
+TOKEN=XXX docker compose up
+
+This will allow you to utilise the GPU resources on your machine if you have one.
+ +Until now, you will have the client side to feed the video/audio data to the API, and the Agent module to consume the data.
+cd ./OpenOmniFramework
+cd ./Client/Responder
+
+# create the virtual environment if this is your first time run this
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+# run the audio player
+
+python3 play_speech.py --token your_token_from_step_3
+
+You will see something like this:
+ +Until now, you should have the whole pipeline running on your local machine.
+You should see new tasks created as expected in the Tasks
page in the API admin page.
+As shown below:
And in the Detailed Latency Benchmark page, you should be able to see the latency of each round of conversation.
+ + + + + + + + + + + + + + + + +This will be similar to the Trail on Cloud section, only differences is that the API end is on your cloud server.
+Under this mode, your storage solution will be s3, you will need to
+After this, the first step you will need to do is deploying it to your cloud server.
+We will assume it is a Linux Machine.
+You will need to have a cloud server, it can be AWS EC2, Azure Compute Engine or any VPS server you can access. +It will need to have a public IP address. +The demonstration about how to deploy it to a cloud server is in our CI/CD process.
+You will need to access the server and install docker
first.
+Test out the command docker
and docker compose
to verify the installation.
And then you can fork our repo, and replace the IP in the .github/workflows/deploy.yml
file with the public IP of your
+server, also remember to set the Actions -> Secrets
, add a secret with the name SERVER_PASSWORD
and the value as
+your server password.
In this way, you can continuously deploy the API to your server when code changes, and merge to the develop
branch.
If you want to manually to do so, it is also simple, just follow the steps in the deploy.yml
file.
+Pull the code to your server and mainly run the command in last step:
cd /root
+rm -rf omni
+mkdir omni
+tar xopf omni.tar -C omni
+cd /root/omni/API
+export STORAGE_SOLUTION=s3
+docker compose -f docker-compose.yml down
+docker compose -f docker-compose.yml up --build -d
+
+Configuration of Nginx will be like this:
+server {
+ server_name openomni.ai4wa.com; # replace with your domain
+ client_max_body_size 100M;
+ location / {
+ proxy_pass http://localhost:8000;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+ }
+}
+
+Then run
+sudo service nginx restart
+
+Add a DNS A record for this sever for your domain, and you should be able to access the API
+at http://your.domain.com
.
Then you can follow the steps in the Trail on Cloud
section to get the Agent and Client running.
API will provide the central logic control and orchestration for the whole system. +It is written in Django and Django Rest Framework. +The database is PostgresSQL.
+The apps in the API are [continue to be developed]:
+Currently, it will provide the following functionalities:
+If you want to add any new functionalities, it is quite easy, you just need to know how to use Django.
+The Agent component is the core of the system, which will be in charge of:
+It is writen in Python, and it is a pretty standard Python project.
+Each different task will have a subfolder within the modules
folder
Key thing to notice is that we create two classes to log the time point and duration to profile the latency performance +of the models.
+Agent/utils/time_logger.py
: log time pointAgent/utils/time_tracker.py
: track durationWe also setup the docker for the Agent component, which is in the Dockerfile
and docker-compose.yml
file.
How we handle the different storage solution is inside the storage.py
file.
As we mentioned in the introduction, models will be need to be downloaded to the data/models
folder, it is normally
+automatically.
Unless you want to run our emotion detection model, if you want to do that, refer to our introduction page.
+ + + + + + + + + + + + + + + +This is to collect the audio and video data from any devices with a camera and microphone. It can be your laptop, it +also can be your Raspberry Pi 4.
+Collect the video is easy, just keep in the background, and record the video when needed, upload it to the API.
+However, collect the audio is a bit tricky, which can be further enhanced.
+Our solution for the audio is using the whisper model to detect when user stop talking, your can specific the energy +threshold or timeout milliseconds to determine when to stop and save this round of sound.
+This will get the API receive the audio in a "conversation" way, speaker stop, Agent process and act, then speaker speak +again.
+However, there are several situations are limited by current solution:
+But it does can handle the basic conversation for research purpose.
+There are several parameters you can specify when you start the audio listener:
+--api_domain
: the API domain, default is http://localhost:8000
, which is within the full local setup--token
: the token you get from the API side--home_id
: If you use cloud mode, you can have multiple homes to upload video and audio data, as one of the most
+ common user case for this could be home intelligent assistant. The home do not limit to an actual home, can be a
+ hospital room, etc.--energy_threshold
: the energy threshold to determine when to stop the audio recording, default is 5000
--timeout
: the timeout milliseconds to determine when to stop the audio recording, default is 30000
in
+ millisecondsdefault_microphone
: which microphone to use if there are multiple microphones, default is pulse
track_cluster
: the cluster you want to track, default is CLUSTER_GPT_4O_ETE_CONVERSATION
Video also in theory should be streaming to a model, however, currently most models do not have the capability to take +streaming input.
+At the same time, most model is taking the images to the model.
+So how we design it now is:
+This is not the best solution, but it is the most practical solution for now.
+There are several parameters you can specify when you start the video listener:
+--api_domain
: the API domain, default is http://localhost:8000
, which is within the full local setup--token
: the token you get from the API side--home_id
: If you use cloud mode, you can have multiple homes to upload video and audio data, as one of the most
+ common user case for this could be home intelligent assistant. The home do not limit to an actual home, can be a
+ hospital room, etc.Then that's all, other setting if you want to customize, you can PR or change it by your own.
+We have described the STORAGE_SOLUTION in our Deployment Options
+The fastest way is definitely on the same machine for all modules, which actually is not practical in production. +So next option will be local network or cloud.
+All it does is pulling the API end to figure out whether there is any audio have not been played, if not, use the url to +play it.
+So the code is very simple and straight forward, it is just a loop to check the API, and play the audio.
+code have a play_speech.py
, all other files are some extent utilities functions.
For the hardware part, it only requires a speaker, so it can be running on a laptop, or working with a Raspberry Pi.
+ + + + + + + + + + + + + + + +We will describe how each module is designed, so you can understand how it works further for future development.
+ +If you want to check the details, you can either check the Source
section or our GitHub repo code.
API
+hardware
+ +llm
+ +orchestrator
+AI
+Django settings for api project.
+Generated by 'django-admin startproject' using Django 5.0.2.
+For more information on this file, see +https://docs.djangoproject.com/en/5.0/topics/settings/
+For the full list of settings and their values, see +https://docs.djangoproject.com/en/5.0/ref/settings/
+ + + +URL configuration for api project.
+The urlpatterns
list routes URLs to views. For more information please see:
+ https://docs.djangoproject.com/en/5.0/topics/http/urls/
+Examples:
+Function views
+ 1. Add an import: from my_app import views
+ 2. Add a URL to urlpatterns: path('', views.home, name='home')
+Class-based views
+ 1. Add an import: from other_app.views import Home
+ 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
+Including another URLconf
+ 1. Import the include() function: from django.urls import include, path
+ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
fire_and_forget(f)
+
+run it and forget it
+ +API/authenticate/utils/fire_and_forget.py
4 + 5 + 6 + 7 + 8 + 9 +10 +11 +12 |
|
timer
+
+
+util function used to log the time taken by a part of program
+ +API/authenticate/utils/timer.py
5 + 6 + 7 + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 |
|
__enter__()
+
+context enter to start write this
+ +API/authenticate/utils/timer.py
27 +28 +29 +30 +31 +32 +33 |
|
__exit__(context, value, traceback)
+
+context exit will write this
+ +API/authenticate/utils/timer.py
35 +36 +37 +38 +39 +40 |
|
__init__(logger, message)
+
+init the timer
+logger: Logger + logger to write the logs +message: str + message to log, like start xxx
+ +API/authenticate/utils/timer.py
10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 |
|
APITokenObtainPairView
+
+
+
+ Bases: TokenObtainPairView
API/authenticate/views.py
21 +22 +23 +24 +25 +26 +27 +28 +29 +30 |
|
post(request, *args, **kwargs)
+
+Override the post method to add custom swagger documentation.
+ +API/authenticate/views.py
24 +25 +26 +27 +28 +29 +30 |
|
DataMultiModalConversationFKAdmin
+
+
+
+ Bases: ImportExportModelAdmin
All the obj above will be self.multi_modal_conversation
+ +API/hardware/admin.py
462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 |
|
DataAudio
+
+
+
+ Bases: Model
Link to home and hardware device, and the audio data will be stored in the database +It will be created by the endpoint from client side when audio data is acquired
+ +API/hardware/models.py
71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 |
|
create_obj(home, uid, hardware_device_mac_address, sequence_index, audio_file, start_time, end_time, track_id=None)
+
+
+ classmethod
+
+
+Create an audio data object
+ +API/hardware/models.py
108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 |
|
url()
+
+get the file, and create media url +Returns:
+ +API/hardware/models.py
137 +138 +139 +140 +141 +142 +143 |
|
DataMultiModalConversation
+
+
+
+ Bases: Model
It will be created when a audio is created +Then video will be added when emotion detection is triggered, or other task require video +Text will be added when speech2text is done +ResText will be added when the text is processed by the language model +ResSpeech will be added when the text is processed by the text2speech
+ +API/hardware/models.py
274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 |
|
DataText
+
+
+
+ Bases: Model
The text data will be stored in the database +It will be created after speech2text is done
+ +API/hardware/models.py
192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 |
|
DataVideo
+
+
+
+ Bases: Model
Link to home and hardware device, and the video data will be stored in the database +It will be created by the endpoint from client side when video data is acquired +Same as the audio data, the video data will be stored in the database +It will not be directly connected to the audio data +Audio data and video data will be connected by the time range softly
+ +API/hardware/models.py
150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 |
|
HardWareDevice
+
+
+
+ Bases: Model
One home can have multiple hardware devices, and the hardware device can be used to acquire the audio and video data
+ +API/hardware/models.py
28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 |
|
Home
+
+
+
+ Bases: Model
Created by setup manually, and the client side can specify the home, so all data will be connected to this.
+ +API/hardware/models.py
9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 |
|
add_data_multimodal_conversation_entry(sender, instance, created, **kwargs)
+
+Add data multimodal conversation
+ +API/hardware/signals.py
7 + 8 + 9 +10 +11 +12 +13 +14 +15 |
|
AudioDataViewSet
+
+
+
+ Bases: ModelViewSet
API/hardware/views.py
48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 |
|
get_audio_data(request)
+
+Override the post method to add custom swagger documentation.
+ +API/hardware/views.py
52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 |
|
Text2SpeechViewSet
+
+
+
+ Bases: ModelViewSet
API/hardware/views.py
160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 |
|
get_text_to_speech(request)
+
+Override the post method to add custom swagger documentation.
+ +API/hardware/views.py
237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 |
|
list_files(request)
+
+List all the files in the S3 bucket
+ +API/hardware/views.py
497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 |
|
upload_file(request)
+
+This is for temporarily solution, as we host the centre server, +and will not provide the S3 access to the general user
+So to testout our system, you can use this endpoint to upload files to S3 +Focus on client and AI side
+ +API/hardware/views.py
455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 |
|
Command
+
+
+
+ Bases: BaseCommand
API/llm/management/commands/check_models.py
10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 |
|
handle(*args, **options)
+
+Loop through the MODELS dictionary and check if the model is in the database. If it is not, add it. +:param args: +:param options: +:return:
+ +API/llm/management/commands/check_models.py
13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 |
|
LLMConfigRecords
+
+
+
+ Bases: Model
API/llm/models.py
13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 |
|
download_model()
+
+Download the model from the model_details +:return:
+ +API/llm/models.py
48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 |
|
LLMConfigViewSet
+
+
+
+ Bases: ModelViewSet
API/llm/views.py
14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 |
|
serializer_class = LLMConfigRecordsSerializer
+
+
+ class-attribute
+ instance-attribute
+
+
+List all available llm config records
+list(request, *args, **kwargs)
+
+Override the post method to add custom swagger documentation.
+ +API/llm/views.py
22 +23 +24 +25 +26 +27 +28 +29 +30 +31 |
|
Django's command-line utility for administrative tasks.
+ + + +main()
+
+Run administrative tasks.
+ +API/manage.py
7 + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 +18 |
|
CLUSTER_GPT_35_ETE_CONVERSATION = {'openai_speech2text': {'order': 0, 'extra_params': {}, 'component_type': 'task', 'task_name': 'openai_speech2text'}, 'completed_openai_speech2text': {'order': 1, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'created_data_text': {'order': 2, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'completed_openai_gpt_35': {'order': 3, 'extra_params': {'sample_ratio': 10, 'prompt_template': '{text}'}, 'component_type': 'task', 'task_name': 'openai_gpt_35'}, 'completed_openai_text2speech': {'order': 4, 'extra_params': {}, 'component_type': 'task', 'task_name': 'openai_text2speech'}}
+
+
+ module-attribute
+
+
+Cluster for gpt3.5 model and gpt3.5 with RAG
+CLUSTER_GPT_4O_TEXT_ETE_CONVERSATION = {'openai_speech2text': {'order': 0, 'extra_params': {}, 'component_type': 'task', 'task_name': 'openai_speech2text'}, 'completed_openai_speech2text': {'order': 1, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'created_data_text': {'order': 2, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'completed_openai_gpt_4o_text_only': {'order': 2, 'extra_params': {'sample_ratio': 10, 'prompt_template': '\n You are a robot, and you are talking to a human.\n\n Your task is to generate a response to the human based on the text\n\n You response will be directly send to end user.\n\n The text is: {text}\n '}, 'component_type': 'task', 'task_name': 'openai_gpt_4o_text_only'}, 'completed_openai_text2speech': {'order': 3, 'extra_params': {}, 'component_type': 'task', 'task_name': 'openai_text2speech'}}
+
+
+ module-attribute
+
+
+Cluster for gpt3.5 model and gpt3.5 with RAG
+CLUSTER_HF_ETE_CONVERSATION = {'speech2text': {'order': 0, 'extra_params': {}, 'component_type': 'task', 'task_name': 'speech2text'}, 'completed_speech2text': {'order': 1, 'extra_params': {}, 'component_type': 'signal', 'task_name': 'None'}, 'created_data_text': {'order': 2, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'completed_emotion_detection': {'order': 3, 'extra_params': {}, 'component_type': 'task', 'task_name': 'emotion_detection'}, 'completed_hf_llm': {'order': 4, 'extra_params': {'hf_model_name': 'Qwen/Qwen2-7B-Instruct'}, 'component_type': 'task', 'task_name': 'hf_llm'}, 'completed_text2speech': {'order': 5, 'extra_params': {}, 'component_type': 'task', 'task_name': 'text2speech'}}
+
+
+ module-attribute
+
+
+Create one to use the full GPT-4o models.
+In theory, it should takes the audio and video in, and then output audio.
+However, until now, the API for audio is not yet available.
+So we will use the walk around by using the speech to text model first, and then call GPT-4o
+CLUSTER_Q_ETE_CONVERSATION = {'speech2text': {'order': 0, 'extra_params': {}, 'component_type': 'task', 'task_name': 'speech2text'}, 'completed_speech2text': {'order': 1, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'created_data_text': {'order': 2, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'completed_emotion_detection': {'order': 3, 'extra_params': {}, 'component_type': 'task', 'task_name': 'emotion_detection'}, 'completed_quantization_llm': {'order': 4, 'extra_params': {'llm_model_name': 'SOLAR-10'}, 'component_type': 'task', 'task_name': 'quantization_llm'}, 'completed_text2speech': {'order': 5, 'extra_params': {}, 'component_type': 'task', 'task_name': 'text2speech'}}
+
+
+ module-attribute
+
+
+Get rid of the emotion detection model
+CLUSTER_Q_NO_EMOTION_ETE_CONVERSATION = {'speech2text': {'order': 0, 'extra_params': {}, 'component_type': 'task', 'task_name': 'speech2text'}, 'completed_speech2text': {'order': 1, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'created_data_text': {'order': 2, 'extra_params': {}, 'component_type': 'signal', 'task_name': None}, 'completed_quantization_llm': {'order': 4, 'extra_params': {'llm_model_name': 'SOLAR-10'}, 'component_type': 'task', 'task_name': 'quantization_llm'}, 'completed_text2speech': {'order': 5, 'extra_params': {}, 'component_type': 'task', 'task_name': 'text2speech'}}
+
+
+ module-attribute
+
+
+This is the pipeline using the HF LLM model for the ETE conversation
+logger = get_logger(__name__)
+
+
+ module-attribute
+
+
+This is for the quantization LLM model for the ETE conversation
+trigger_completed_emotion_detection(sender, **kwargs)
+
+This will create a task to do the quantization LLM inference
+ +API/orchestrator/chain/completed_emotion_detection.py
14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 |
|
trigger_completed_hf_llm(sender, **kwargs)
+
+This will create the response, which will be a text 2 text task +We will create the ResText here
+ +API/orchestrator/chain/completed_hf_llm.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 |
|
trigger_completed_openai_gpt_35(sender, **kwargs)
+
+This will create the response, which will be a text 2 text task
+ +API/orchestrator/chain/completed_openai_gpt_35.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 |
|
trigger_completed_openai_gpt_4o_text_and_image(sender, **kwargs)
+
+This will create the response, which will be a text 2 text task
+ +API/orchestrator/chain/completed_openai_gpt_4o_text_and_image.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 |
|
trigger_completed_openai_gpt_4o_text_only(sender, **kwargs)
+
+This will create the response, which will be a text 2 text task
+ +API/orchestrator/chain/completed_openai_gpt_4o_text_only.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 |
|
trigger_completed_openai_speech2text(sender, **kwargs)
+
+We will need to gather the text, and then grab the video to the next step
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
sender |
+ + | +
+
+
+ The sender of the signal + |
+ + required + | +
**kwargs |
+ + | +
+
+
+ The data passed to the signal + |
+
+ {}
+ |
+
Returns:
+ +API/orchestrator/chain/completed_openai_speech2text.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 |
|
trigger_completed_openai_text2speech(sender, **kwargs)
+
+After the text2speech is done, save it to the database
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
sender |
+ + | +
+
+
+ The sender of the signal + |
+ + required + | +
kwargs |
+ + | +
+
+
+ The data passed to the signal + |
+
+ {}
+ |
+
API/orchestrator/chain/completed_openai_text2speech.py
13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 |
|
trigger_completed_quantization_llm(sender, **kwargs)
+
+This will create the response, which will be a text 2 text task +And we will need to log this ResText
+ +API/orchestrator/chain/completed_quantization_llm.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 |
|
trigger_completed_rag(sender, **kwargs)
+
+This will create the response, which will be a text 2 text task +And we will need to log this ResText
+ +API/orchestrator/chain/completed_rag.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 |
|
trigger_completed_speech2text(sender, **kwargs)
+
+After the speech2text is done, save it to the database
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
sender |
+ + | +
+
+
+ The sender of the signal + |
+ + required + | +
kwargs |
+ + | +
+
+
+ The data passed to the signal + |
+
+ {}
+ |
+
API/orchestrator/chain/completed_speech2text.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 |
|
trigger_completed_task(sender, **kwargs)
+
+Trigger the multi-modal emotion detection.
+ +API/orchestrator/chain/completed_task.py
24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 |
|
trigger_completed_text2speech(sender, **kwargs)
+
+After the text2speech is done, save it to the database
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
sender |
+ + | +
+
+
+ The sender of the signal + |
+ + required + | +
kwargs |
+ + | +
+
+
+ The data passed to the signal + |
+
+ {}
+ |
+
API/orchestrator/chain/completed_text2speech.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 |
|
trigger_created_data_text(sender, **kwargs)
+
+This function will trigger the emotion detection model with the latest data
+It will first look for the latest data_text, +and then get the audio and image data based on the time range of the audio data
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
sender |
+ + | +
+
+
+ The sender of the signal + |
+ + required + | +
kwargs |
+ + | +
+
+
+ The data passed to the signal + |
+
+ {}
+ |
+
Returns:
+ +API/orchestrator/chain/created_data_text.py
14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 |
|
Here will define a list of clusters
+Each cluster will have a list of chain components
+For example, end-to-end conversation chain will have the following components:
+ClusterManager
+
+
+API/orchestrator/chain/manager.py
26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 |
|
chain_next(track_id, current_component, next_component_params, name=None, user=None)
+
+
+ classmethod
+
+
+Chain to the next component
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
current_component |
+
+ str
+ |
+
+
+
+ The current component + |
+ + required + | +
track_id |
+
+ str
+ |
+
+
+
+ The track ID + |
+ + required + | +
next_component_params |
+
+ dict
+ |
+
+
+
+ The next component parameters + |
+ + required + | +
name |
+
+ str
+ |
+
+
+
+ The task name, it will be used to aggregate the task + |
+
+ None
+ |
+
user |
+
+ None
+ |
+
+
+
+ The user + |
+
+ None
+ |
+
API/orchestrator/chain/manager.py
84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 |
|
get_cluster(cluster_name)
+
+
+ staticmethod
+
+
+Get the cluster
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
cluster_name |
+
+ str
+ |
+
+
+
+ The cluster name + |
+ + required + | +
API/orchestrator/chain/manager.py
28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 |
|
get_next(cluster_name, current_component)
+
+
+ classmethod
+
+
+Get the next component
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
cluster_name |
+
+ str
+ |
+
+
+
+ The cluster name + |
+ + required + | +
current_component |
+
+ str
+ |
+
+
+
+ The current component + |
+ + required + | +
API/orchestrator/chain/manager.py
70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 |
|
get_next_chain_component(cluster, current_component)
+
+
+ staticmethod
+
+
+Get the next chain
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
cluster |
+
+ dict
+ |
+
+
+
+ The cluster + |
+ + required + | +
current_component |
+
+ str
+ |
+
+
+
+ The current component + |
+ + required + | +
Tuple[Optional[str], Optional[dict]]: The next component and its parameters if exists, otherwise None
+API/orchestrator/chain/manager.py
40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 |
|
data_multimodal_conversation_log_context_emotion_detection(task_data, result, logs=None)
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task_data |
+
+ TaskData
+ |
+
+
+
+ the task data + |
+ + required + | +
result |
+
+ dict
+ |
+
+
+
+ the result of the context emotion detection + |
+ + required + | +
logs |
+
+ dict
+ |
+
+
+
+ the logs of the context emotion detection + |
+
+ None
+ |
+
Returns:
+ +API/orchestrator/chain/utils.py
52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 |
|
data_multimodal_conversation_log_context_rag(task_data, result, logs=None)
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task_data |
+
+ TaskData
+ |
+
+
+
+ the task data + |
+ + required + | +
result |
+
+ dict
+ |
+
+
+
+ the result of the context rag + |
+ + required + | +
logs |
+
+ dict
+ |
+
+
+
+ the logs of the context rag + |
+
+ None
+ |
+
Returns:
+ +API/orchestrator/chain/utils.py
78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 |
|
data_multimodal_conversation_log_res_speech(task_data, speech_file_path)
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task_data |
+
+ TaskData
+ |
+
+
+
+ the task data + |
+ + required + | +
speech_file_path |
+
+ str
+ |
+
+
+
+ the speech file path + |
+ + required + | +
Returns:
+ +API/orchestrator/chain/utils.py
31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 |
|
data_multimodal_conversation_log_res_text(task_data, text)
+
+Log the ResText to the DataMultiModalConversation
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task_data |
+
+ TaskData
+ |
+
+
+
+ The task data + |
+ + required + | +
text |
+
+ str
+ |
+
+
+
+ The text to log + |
+ + required + | +
API/orchestrator/chain/utils.py
14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 |
|
AccuracyBenchmark
+
+
+API/orchestrator/metrics/accuracy_benchmark.py
20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 |
|
__init__(benchmark_cluster=CLUSTER_Q_ETE_CONVERSATION_NAME)
+
+Initialize the benchmark +Args: + benchmark_cluster (str): The benchmark cluster
+ +API/orchestrator/metrics/accuracy_benchmark.py
21 +22 +23 +24 +25 +26 +27 +28 |
|
annotation_average(df)
+
+
+ staticmethod
+
+
+Calculate the average of the annotation +Args: + df (pd.DataFrame): The dataframe
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ pd.DataFrame: The dataframe + |
+
API/orchestrator/metrics/accuracy_benchmark.py
470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 |
|
benchmark_run()
+
+Run the benchmark
+ +API/orchestrator/metrics/accuracy_benchmark.py
30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 |
|
calculate_speech2text_accuracy(df)
+
+
+ staticmethod
+
+
+Calculate the speech2text accuracy +Args: + df (pd.DataFrame): The dataframe
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
float |
+ DataFrame
+ |
+
+
+
+ The accuracy + |
+
API/orchestrator/metrics/accuracy_benchmark.py
422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 |
|
extract_required_annotation_models(cluster_name)
+
+
+ staticmethod
+
+
+Extract the required annotation models +Args: + cluster_name (str): The cluster name
+ +API/orchestrator/metrics/accuracy_benchmark.py
396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 |
|
multi_turn_benchmark_run()
+
+Run the multi-turn benchmark +Returns:
+ +API/orchestrator/metrics/accuracy_benchmark.py
534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 |
|
plot_distribution(df, title='')
+
+
+ staticmethod
+
+
+Plot the distribution of the latency +Args: + df (pd.DataFrame): The dataframe + title (str): The title
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The plot in HTML + |
+
API/orchestrator/metrics/accuracy_benchmark.py
342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 |
|
plot_table(df, title='')
+
+
+ staticmethod
+
+
+Plot the table +Args: + df (pd.DataFrame): The dataframe + title (str): The title
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The plot in HTML + |
+
API/orchestrator/metrics/accuracy_benchmark.py
271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 |
|
process_cluster_benchmark(cluster_name, detailed=False)
+
+Process the benchmark for a specific cluster
+For each cluster, we will need to analyse the conversation model +And also need to understand what's the else model we need to analyse, for example the emotion_detection +Args: + cluster_name (str): The cluster name + detailed (bool): The detailed flag
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The HTML content + |
+
API/orchestrator/metrics/accuracy_benchmark.py
49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 |
|
process_multi_turn_benchmark(cluster_name)
+
+Process the multi-turn benchmark
+First we will need to get all tag with this cluster name, and grab the last one within each tag +Args: + cluster_name (str): The cluster name +Returns:
+ +API/orchestrator/metrics/accuracy_benchmark.py
553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 |
|
summary_df(df)
+
+
+ staticmethod
+
+
+Summary the given dataframe
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
df |
+
+ DataFrame
+ |
+
+
+
+ The dataframe + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ DataFrame
+ |
+
+
+
+ The HTML content + |
+
API/orchestrator/metrics/accuracy_benchmark.py
496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 |
|
LatencyBenchmark
+
+
+For each component, we will generally have two values: +- model_latency: The time taken by the model to process the data +- transfer_latency: The time taken to transfer the data to the model +- overall_latency: The time taken by the model to process the data and transfer the data to the model
+The whole pipeline latency will be the sum of +- all component start end end ts
+Another way to output the performance is the Timeline +- start will be 0 +- and average relative time to 0 for each important time point, plot them in the timeline
+ +API/orchestrator/metrics/latency_benchmark.py
19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 |
|
__init__(benchmark_cluster=CLUSTER_Q_ETE_CONVERSATION_NAME)
+
+Initialize the benchmark +Args: + benchmark_cluster (str): The benchmark cluster
+ +API/orchestrator/metrics/latency_benchmark.py
34 +35 +36 +37 +38 +39 +40 +41 |
|
plot_distribution(df, title='')
+
+
+ staticmethod
+
+
+Plot the distribution of the latency +Args: + df (pd.DataFrame): The dataframe + title (str): The title
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The plot in HTML + |
+
API/orchestrator/metrics/latency_benchmark.py
514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 |
|
plot_stacked_timeline(df, title)
+
+
+ staticmethod
+
+
+Plot the stacked timeline +Args: + df (pd.DataFrame): The dataframe + title (str): The title
+Returns:
+ +API/orchestrator/metrics/latency_benchmark.py
565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 |
|
plot_table(df, title='')
+
+
+ staticmethod
+
+
+Plot the table +Args: + df (pd.DataFrame): The dataframe + title (str): The title
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The plot in HTML + |
+
API/orchestrator/metrics/latency_benchmark.py
443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 |
|
plot_timestamp_timeline(df)
+
+
+ staticmethod
+
+
+Plot the timestamp timeline +Args: + df (pd.DataFrame): The dataframe
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The plot in HTML + |
+
API/orchestrator/metrics/latency_benchmark.py
692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 |
|
plot_timestamp_timeline_depth(df, title)
+
+
+ staticmethod
+
+
+Plot the timestamp timeline +Args: + df (pd.DataFrame): The dataframe + title (str): The title
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The plot in HTML + |
+
API/orchestrator/metrics/latency_benchmark.py
610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 |
|
process_cluster(cluster_name)
+
+Process the cluster +Args: + cluster_name (str): The cluster name
+ +API/orchestrator/metrics/latency_benchmark.py
74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 |
|
process_cluster_detail(cluster_name)
+
+Process the cluster in detail +Even if the track is not finished, we will still plot it and stop status +Args: + cluster_name (str): html content
+Returns:
+ +API/orchestrator/metrics/latency_benchmark.py
133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 |
|
process_task_group(task_track)
+
+
+ staticmethod
+
+
+This will process each component, and then extract the transfer and model latency total
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task_track |
+
+ List[Task]
+ |
+
+
+
+ The task track + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
dict | + | +
+
+
+ The benchmark result + |
+
API/orchestrator/metrics/latency_benchmark.py
200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 |
|
process_task_group_detail(task_track)
+
+
+ staticmethod
+
+
+This will process each component, and then extract the transfer and model latency total
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task_track |
+
+ List[Task]
+ |
+
+
+
+ The task track + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
dict | + | +
+
+
+ The benchmark result + |
+
API/orchestrator/metrics/latency_benchmark.py
277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 |
|
process_task_group_detail_timeline(task_track, timeline=False)
+
+
+ staticmethod
+
+
+Based on the result_json => latency_profile +We will gather the time point for each, and then change to the relative second value compared to start point
+If timeline is True, we will only grab the timestamp information. +Otherwise, we will calculate the relative time to the start point
+In the end, we will grab the +Args: + task_track (List[Task]): The task track + timeline (bool): If we want to plot the timeline
+Returns:
+ +API/orchestrator/metrics/latency_benchmark.py
371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 |
|
run()
+
+Run the benchmark
+ +API/orchestrator/metrics/latency_benchmark.py
43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 |
|
extract_task_group(cluster_name)
+
+Extract the task group +Args: + cluster_name (str): The cluster name
+Returns:
+ +API/orchestrator/metrics/utils.py
11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 |
|
get_task_names_order(track_id)
+
+Get the task names order +Args: + track_id (str): The track ID
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ List[str]
+ |
+
+
+
+ The task names order + |
+
API/orchestrator/metrics/utils.py
52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 |
|
str_to_datetime(datetime_str)
+
+Convert the datetime string to datetime object +Args: + datetime_str (str): the string datetime, like this: 2024-07-01T14:58:36.419352
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
datetime |
+ datetime
+ |
+
+
+
+ The datetime object + |
+
API/orchestrator/metrics/utils.py
72 +73 +74 +75 +76 +77 +78 +79 +80 +81 |
|
Task
+
+
+
+ Bases: Model
API/orchestrator/models.py
11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 |
|
create_task(user, name, task_name, parameters, description='', track_id=None)
+
+
+ classmethod
+
+
+Create a task +Args: + user (User): The user who created the task + name (str): The name of the task + task_name (str): The name of the task + parameters (dict): The parameters for the task + description (str): The description of the task + track_id (str): The tracking ID of the task, will start with T-{cluster_name}-{id}
+Returns:
+ +API/orchestrator/models.py
64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 |
|
get_task_name_choices()
+
+
+ staticmethod
+
+
+Get dynamic task name choices +Returns: + list: List of tuples containing task name choices
+ +API/orchestrator/models.py
120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 |
|
init_track_id(name)
+
+
+ staticmethod
+
+
+Initialize the track ID +Args: + name (str): The name of the task
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The track ID + |
+
API/orchestrator/models.py
98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 |
|
QueueTaskViewSet
+
+
+
+ Bases: ViewSet
A ViewSet for queuing AI tasks generally
+ +API/orchestrator/views.py
18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 |
|
ai_task(request)
+
+Endpoint to queue tasks for AI Client side to run
+ +API/orchestrator/views.py
27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 |
|
task(request, task_name='all')
+
+Endpoint to get the task for AI
+ +API/orchestrator/views.py
73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 |
|
update_result(request, pk=None)
+
+Endpoint to update the result of a task.
+ +API/orchestrator/views.py
119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 |
|
worker(request)
+
+Endpoint to register a GPU worker.
+ +API/orchestrator/views.py
160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 |
|
AIOrchestrator
+
+
+This is the AI Orchestrator
+We will pull the task from the API end +And then based on which type of the task it is, we will send it to the respective handler
+ +Agent/main.py
30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 |
|
__init__(api_domain, token, task_name='all', time_sleep=1.5)
+
+Initialize the AI Orchestrator +Args: + api_domain (str): The API Domain + token (str): The API Token + task_name (str): The task name. Default is "all" + time_sleep (float): The time to sleep. Default is 1.5 during each loop
+ +Agent/main.py
38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 |
|
authenticate_token()
+
+Authenticate the token +Returns: + bool: True if the token is valid
+ +Agent/main.py
97 + 98 + 99 +100 +101 +102 +103 |
|
handle_emotion_detection_task(task)
+
+Handle the emotion detection task +Args: + task (Task): The task
+ +Agent/main.py
191 +192 +193 +194 +195 +196 +197 +198 +199 +200 |
|
handle_general_ml_task(task)
+
+Handle the general ml task +Args: + task (Task): The task
+Returns:
+ +Agent/main.py
227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 |
|
handle_hf_llm_task(task)
+
+Handle the hf llm task which will require more time compare to other tasks +Args: + task (Task): The task
+Returns:
+ +Agent/main.py
213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 |
|
handle_openai_task(task)
+
+Handle the openai task +Args: + task (Task): The task
+Returns:
+ +Agent/main.py
241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 |
|
handle_quantization_llm_task(task)
+
+Handle the quantization llm task +Args: + task (Task): The task
+ +Agent/main.py
202 +203 +204 +205 +206 +207 +208 +209 +210 +211 |
|
handle_rag_task(task)
+
+Handle the rag task +Args: + task (Task): The task
+Returns:
+ +Agent/main.py
255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 |
|
handle_speech2text_task(task)
+
+Handle the speech2text task +Args: + task (Task): The task
+ +Agent/main.py
169 +170 +171 +172 +173 +174 +175 +176 +177 +178 |
|
handle_task(task)
+
+Handle the task +Args: + task (dict): The task
+ +Agent/main.py
149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 |
|
handle_text2speech_task(task)
+
+Handle the text2speech task +Args: + task (Task): The task
+ +Agent/main.py
180 +181 +182 +183 +184 +185 +186 +187 +188 +189 |
|
Task
+
+
+
+ Bases: BaseModel
The Task Model +This is the one we will pull and ask for the task from the API
+ +Agent/models/task.py
38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 |
|
FeaturesExtractor
+
+
+Agent/modules/emotion_detection/features_extraction.py
20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 |
|
get_audio_embedding(audios)
+
+
+ staticmethod
+
+
+Extracts and returns average audio features from a list of audio files.
+ +Agent/modules/emotion_detection/features_extraction.py
25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 |
|
get_images_tensor(images)
+
+Extracts features from a list of images using a specified model.
+ +Agent/modules/emotion_detection/features_extraction.py
42 +43 +44 +45 +46 +47 +48 +49 |
|
EmotionDetectionHandler
+
+
+Agent/modules/emotion_detection/handler.py
28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 |
|
handle_task(task)
+
+Handle the task +Args: + task (Task): The task to handle +Returns: + The task with the result
+ +Agent/modules/emotion_detection/handler.py
30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 |
|
trigger_model(text, audio_paths, images_paths)
+
+
+ staticmethod
+
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
text |
+
+ str
+ |
+
+
+
+ The text to analyze for emotion + |
+ + required + | +
audio_paths |
+
+ List[str]
+ |
+
+
+
+ The audio data to analyze for emotion + |
+ + required + | +
images_paths |
+
+ List[str]
+ |
+
+
+
+ The images data to analyze for emotion + |
+ + required + | +
Returns:
+ +Agent/modules/emotion_detection/handler.py
56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 |
|
BertTextEncoder
+
+
+
+ Bases: Module
Agent/modules/emotion_detection/sentiment.py
177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 |
|
__init__(language='en', use_finetune=False)
+
+language: en / cn
+ +Agent/modules/emotion_detection/sentiment.py
178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 |
|
forward(text)
+
+text: (batch_size, 3, seq_len) +3: input_ids, input_mask, segment_ids +input_ids: input_ids, +input_mask: attention_mask, +segment_ids: token_type_ids
+ +Agent/modules/emotion_detection/sentiment.py
215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 |
|
from_text(text)
+
+text: raw data
+ +Agent/modules/emotion_detection/sentiment.py
204 +205 +206 +207 +208 +209 +210 +211 +212 +213 |
|
GeneralMLModel
+
+
+Agent/modules/general_ml/handler.py
14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 |
|
handle_task(task)
+
+Handle the task +Args: + task (Task): The task to handle
+ + +Returns:
+Type | +Description | +
---|---|
+ Task
+ |
+
+
+
+ Updated task + |
+
Agent/modules/general_ml/handler.py
18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 |
|
infer(ml_model, general_model_name, text, params)
+
+
+ staticmethod
+
+
+Infer the model +Args: + ml_model: General model + general_model_name (str): Model name + text (str): Text + params (dict): Model params
+Returns:
+ +Agent/modules/general_ml/handler.py
72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 |
|
load_model(general_model_name)
+
+
+ staticmethod
+
+
+Load model +Args: + general_model_name (str): Model name
+Returns:
+ +Agent/modules/general_ml/handler.py
58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 |
|
HFLLM
+
+
+Agent/modules/hf_llm/handler.py
17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 |
|
handle_task(task)
+
+Handle the task +Args: + task (Task): The task to handle
+ + +Returns:
+Type | +Description | +
---|---|
+ Task
+ |
+
+
+
+ Updated task + |
+
Agent/modules/hf_llm/handler.py
22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 |
|
OpenAIHandler
+
+
+Agent/modules/openai/handler.py
24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 |
|
gpt_35(task)
+
+Call OpenAI endpoints to convert speech to text +Args: + task (Task): The path to the audio file
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ Optional[str]
+ |
+
+
+
+ The transcribed text + |
+
Agent/modules/openai/handler.py
150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 |
|
gpt_4o_text_and_images(task)
+
+Get the text and images +And then call the GPT-4o endpoints
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task |
+
+ Task
+ |
+
+
+
+ The task + |
+ + required + | +
Returns:
+ +Agent/modules/openai/handler.py
190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 |
|
gpt_4o_text_only(task)
+
+Get the text only +Args: + task:
+Returns:
+ +Agent/modules/openai/handler.py
116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 |
|
handle_task(task)
+
+Handle the task +Args: + task (Task): The task
+ + +Returns:
+Type | +Description | +
---|---|
+ Task
+ |
+
+
+
+ The task with the result + |
+
Agent/modules/openai/handler.py
28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 |
|
speech2text(task)
+
+Call OpenAI endpoints to convert speech to text +Args: + task (Task): The path to the audio file
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ Optional[str]
+ |
+
+
+
+ The transcribed text + |
+
Agent/modules/openai/handler.py
71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 |
|
text2speech(task)
+
+Call OpenAI endpoints to convert text to speech +Args: + task (Task): The text to convert
+Returns:
+ +Agent/modules/openai/handler.py
287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 |
|
QuantizationLLMAdaptor
+
+
+This is the adaptor for the Quantization LLM model
+ +Agent/modules/quantization_llm/adaptor_worker.py
12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 |
|
create_chat_completion(prompt=None, messages=None, tools=None, tool_choice=None, *args, **kwargs)
+
+Create chat completion for the given prompt and messages +Args: + prompt (str): The prompt to generate completion for the model + messages (List[Dict[str, str]]): The messages to generate completion for the model + tools (List[ChatCompletionTool]): The tools to use for chat completion + tool_choice (ChatCompletionToolChoiceOption): The tool choice to use for chat completion + args: + *kwargs:
+Returns:
+ +Agent/modules/quantization_llm/adaptor_worker.py
45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 |
|
create_completion(prompt)
+
+Create completion for the given prompt +Args: + prompt (str): The prompt to generate completion for the model
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ The completion generated by the model + |
+
Agent/modules/quantization_llm/adaptor_worker.py
22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 |
|
create_embedding(text)
+
+Create embedding for the given text +Args: + text (str): The text to generate embedding for
+ + +Returns:
+Type | +Description | +
---|---|
+ List[float]
+ |
+
+
+
+ List[float]: The embedding generated by the model + |
+
Agent/modules/quantization_llm/adaptor_worker.py
91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 |
|
QuantizationLLM
+
+
+Agent/modules/quantization_llm/handler.py
15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 |
|
__init__(api)
+
+Here is used to load and manage the quantization LLM model
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
api |
+
+ API
+ |
+
+
+
+ The API object to query the API + |
+ + required + | +
Agent/modules/quantization_llm/handler.py
16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 |
|
handle_task(task)
+
+Handle the task +Args: + task (Task): The task to handle
+Returns:
+ +Agent/modules/quantization_llm/handler.py
33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 |
|
infer(text, llm_model_config)
+
+
+ staticmethod
+
+
+Infer the task +Args: + text (str): The text to infer + llm_model_config (QuantizationLLMModelConfig): The llm model config
+Returns:
+ +Agent/modules/quantization_llm/handler.py
82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 |
|
QuantizationLLMModelConfig
+
+
+Agent/modules/quantization_llm/models.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 |
|
__init__(model_name, model_family, repo, filename, file_size, available, *args, **kwargs)
+
+Initialize the LLM Model Config +Args: + model_name (str): The name of the model + model_size (str): The size of the model + model_family (str): The family of the model + model_type (str): The type of the model + repo (str): The repo of the model + filename (str): The filename of the model + file_size (float): The size of the model file + available (bool): If the model is + args: + *kwargs:
+ +Agent/modules/quantization_llm/models.py
13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 |
|
download_model()
+
+If the model is not available, download it from the HuggingFace Hub +Returns:
+ +Agent/modules/quantization_llm/models.py
61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 |
|
model_path()
+
+Check or load the model from the local directory +Returns:
+ +Agent/modules/quantization_llm/models.py
48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 |
|
RAGHandler
+
+
+Agent/modules/rag/handler.py
13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 |
|
handle_task(task)
+
+Handle the task +Args: + task:
+Returns:
+ +Agent/modules/rag/handler.py
17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 |
|
Speech2Text
+
+
+Agent/modules/speech_to_text/speech2text.py
18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 |
|
__init__(model_name='whisper', model_size='small', multi_language=True)
+
+Initialize the translator +Args: + model_name (str): The name of the model to use + model_size (str): The size of the model to use + multi_language (bool): If the model is multi-language
+ +Agent/modules/speech_to_text/speech2text.py
21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 |
|
handle_task(task)
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task |
+
+ Task
+ |
+
+
+
+ The task to process + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Task
+ |
+
+
+
+ The processed task + |
+
Agent/modules/speech_to_text/speech2text.py
108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 |
|
locate_audio_file(uid, sequence_index, end_time)
+
+
+ staticmethod
+
+
+Locate the audio file +Args: + uid (str): The uid + sequence_index (str): The sequence index + end_time (str): The end time
+ + +Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The audio file (str): The audio file + |
+
Agent/modules/speech_to_text/speech2text.py
42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 |
|
translate(message, task)
+
+This is the key function to translate the audio to text +Args: + message (dict): The message to translate + task (Task): The task
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
task |
+ Task
+ |
+
+
+
+ The task + |
+
Agent/modules/speech_to_text/speech2text.py
66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 |
|
Text2Speech
+
+
+Agent/modules/text_to_speech/text2speech.py
15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 |
|
__init__(model_name='openai', to_s3=False)
+
+Initialize the STT object
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
model_name |
+
+ str
+ |
+
+
+
+ The name of the model to use + |
+
+ 'openai'
+ |
+
to_s3 |
+
+ bool
+ |
+
+
+
+ If the audio file should be uploaded to S3 + |
+
+ False
+ |
+
Agent/modules/text_to_speech/text2speech.py
17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 |
|
handle_task(task)
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
task |
+
+ Task
+ |
+
+
+
+ The task to handle + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Task
+ |
+
+
+
+ The task with the result + |
+
Agent/modules/text_to_speech/text2speech.py
30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 |
|
text_to_speech_openai(task, task_param)
+
+Convert the text to speech using OpenAI API +Args: + task (Task): The task to handle + task_param (Text2SpeechParameters): The parameters for the task
+Returns:
+ +Agent/modules/text_to_speech/text2speech.py
49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 |
|
upload_to_s3(file_path, s3_key)
+
+
+ staticmethod
+
+
+Upload the file to S3 +Args: + file_path (str): The path to the file + s3_key (str): The key to use in S3
+ +Agent/modules/text_to_speech/text2speech.py
89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 |
|
This is the storage module.
+It will include two process
+StorageSolution
+
+
+Agent/storage.py
30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 |
|
download_audio(audio_file_id, dest_path)
+
+Download the audio file +Args: + audio_file_id (str): the audio file id + dest_path (str): the destination
+Returns:
+ +Agent/storage.py
204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 |
|
download_data(files)
+
+Download the data from the cloud +Args: + files:
+Returns:
+ +Agent/storage.py
165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 |
|
download_video(video_file_id, dest_path)
+
+Download the video file +Args: + video_file_id (str): the video file id + dest_path (str): the destination
+Returns:
+ +Agent/storage.py
232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 |
|
sync_pull_api()
+
+Sync the data from api
+ +Agent/storage.py
148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 |
|
sync_pull_data()
+
+If storage solution is volume or local, this means the data is accessible locally, do not need to worry about it +This will first call cloud to list all audio and video files +And then compare them with local ones +If there is any new files, download them
+Returns:
+ +Agent/storage.py
111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 |
|
sync_pull_local()
+
+Sync the data from the local network +directly run the rsync command
+ +Agent/storage.py
130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 |
|
sync_pull_s3()
+
+Sync the data from s3
+ +Agent/storage.py
142 +143 +144 +145 +146 |
|
sync_push_api()
+
+Sync the data to the api
+ +Agent/storage.py
95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 |
|
sync_push_data()
+
+Sync the data to the storage
+ +Agent/storage.py
47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 |
|
sync_push_local()
+
+Sync the data to the local network
+ +Agent/storage.py
60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 |
|
sync_push_s3()
+
+
+ staticmethod
+
+
+Sync the data to the s3
+ +Agent/storage.py
79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 |
|
API
+
+
+This is the class to communicate with the API component
+ +Agent/utils/api.py
14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 |
|
__init__(domain=API_DOMAIN, token='', uuid='', task_name='llm')
+
+Init API class to communicate with the API +Args: + domain (str): The domain of the API + token (str): The token to authenticate + uuid (str): The UUID of the worker + task_name (str): The task type of the worker
+ +Agent/utils/api.py
19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 |
|
download_file_link(file_id, file_type)
+
+Get the download file link
+ +Agent/utils/api.py
215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 |
|
get_available_models()
+
+Get the available LLM models from the API +Returns:
+ +Agent/utils/api.py
58 +59 +60 +61 +62 +63 +64 +65 +66 +67 |
|
get_local_ip()
+
+
+ staticmethod
+
+
+Get the local IP address +Returns: + str: The local IP address
+ +Agent/utils/api.py
134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 |
|
get_storage_solution()
+
+Get the storage solution from the API +Returns:
+ +Agent/utils/api.py
155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 |
|
get_task()
+
+Get the task from the API +Returns:
+ +Agent/utils/api.py
69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 |
|
list_files(from_time=None)
+
+List the files from the API
+ +Agent/utils/api.py
197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 |
|
post_task_result(task)
+
+Post the task result to the API +Args: + task[Task]: The task to post the result
+Returns:
+ +Agent/utils/api.py
84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 |
|
register_or_update_worker()
+
+Register or update the worker +So we can know whether the worker is alive or not
+ +Agent/utils/api.py
111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 |
|
upload_file(source_file, dest_path)
+
+Upload the file to the API
+ +Agent/utils/api.py
172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 |
|
NORMAL_MODELS = [BERT]
+
+
+ module-attribute
+
+
+LLM_MODEL_DIR = BASE_DIR / "llm" / "models"
+
+API_DOMAIN = "http://localhost:8000" # default domain
+
+# model types
+HF_LLAMA = "HuggingFace"
+MT_LLAMA = "llama.cpp"
+MT_API = "api"
+MT_CHATGLM = "chatglm.cpp"
+MODEL_TYPES = [HF_LLAMA, MT_LLAMA, MT_API, MT_CHATGLM]
+
+# model names
+MN_LLAMA2 = "llama2"
+MN_GEMMA = "gemma"
+
+BERT = "bert"
+NORMAL_MODELS = [BERT]
+
+ get_logger(logger_name=None, stream=True)
+
+init the logger, give it proper format, log them both in terminal stream and file
+ +Agent/utils/get_logger.py
5 + 6 + 7 + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 |
|
APISyncHandler
+
+
+
+ Bases: FileSystemEventHandler
Sync the files to s3 when they are created, modified, moved or deleted
+ +Agent/utils/storage/api_sync_handler.py
10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 |
|
LocalSyncHandler
+
+
+
+ Bases: FileSystemEventHandler
Sync the files to disk when they are created, modified, moved or deleted
+ +Agent/utils/storage/local_sync_handler.py
10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 |
|
__init__(src_path, dest_path, sshpass)
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
src_path |
+
+ str
+ |
+
+
+
+ The source path to sync + |
+ + required + | +
dest_path |
+
+ str
+ |
+
+
+
+ The destination path to sync + |
+ + required + | +
sshpass |
+
+ str
+ |
+
+
+
+ The password to ssh + |
+ + required + | +
Agent/utils/storage/local_sync_handler.py
15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 |
|
on_any_event(event)
+
+Sync the files to disk when they are created, modified, moved or deleted +Args: + event:
+Returns:
+ +Agent/utils/storage/local_sync_handler.py
28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 |
|
S3SyncHandler
+
+
+
+ Bases: FileSystemEventHandler
Sync the files to s3 when they are created, modified, moved or deleted
+ +Agent/utils/storage/s3_sync_handler.py
9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 |
|
logger = get_logger(__name__)
+
+
+ module-attribute
+
+
+For the latency
+If it is model, the name will start with model_xx, and it is a duration +If it is transfer time, the name will start with transfer_xx, and it is a duration +If it is just to log the timestamp, the name will start with ts_xx, and it is a timestamp
+TimeLogger
+
+
+Agent/utils/time_logger.py
17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 |
|
log(profile, name)
+
+
+ staticmethod
+
+
+Log the time taken to execute a block of code +Args: + profile (dict): The profile to store the time + name (str): The name of the block
+Returns:
+ +Agent/utils/time_logger.py
34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 |
|
log_task(task, name)
+
+
+ staticmethod
+
+
+Log the time taken to execute a block of code +Args: + task (Task): The task to store the time + name (str): The name of the block
+Returns:
+ +Agent/utils/time_logger.py
19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 |
|
time_tracker(label, profile, track_type=TrackType.MODEL.value)
+
+Track the time taken to execute a block of code +Args: + label (str): The name of the block + profile (dict): The profile to store the time + track_type (str): The type of tracking
+ +Agent/utils/time_tracker.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 |
|
timer
+
+
+util function used to log the time taken by a part of program
+ +Agent/utils/timer.py
5 + 6 + 7 + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 |
|
__enter__()
+
+context enter to start write this
+ +Agent/utils/timer.py
27 +28 +29 +30 +31 +32 +33 |
|
__exit__(context, value, traceback)
+
+context exit will write this
+ +Agent/utils/timer.py
35 +36 +37 +38 +39 +40 |
|
__init__(logger, message)
+
+init the timer
+logger: Logger + logger to write the logs +message: str + message to log, like start xxx
+ +Agent/utils/timer.py
10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 |
|
API
+
+
+This is used to communicate with the API.
+Client/Listener/api.py
14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 |
|
__init__(domain=API_DOMAIN, token='', home_id=None, track_cluster=None)
+
+The API class for the responder
+It will require the token and the endpoint to communicate with the API.
+If you deploy the API to a cloud server, do not forget to change the domain to the server's domain.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
domain |
+
+ str
+ |
+
+
+
+ The domain of the API. + |
+
+ API_DOMAIN
+ |
+
token |
+
+ str
+ |
+
+
+
+ The token for the API. + |
+
+ ''
+ |
+
home_id |
+
+ int
+ |
+
+
+
+ The home ID. + |
+
+ None
+ |
+
track_cluster |
+
+ str
+ |
+
+
+
+ The track cluster. + |
+
+ None
+ |
+
Client/Listener/api.py
24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 |
|
get_storage_solution()
+
+Get the storage solution from the API +Returns:
+ +Client/Listener/api.py
212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 |
|
post_audio(uid, sequence_index, audio_file, start_time, end_time, track_id=None)
+
+Post metadata of the audio to the API. +Args: + uid (str): uuid of the audio + sequence_index (int): The sequence index of the audio in this loop, together with uuid, + it can be used to identify the audio + audio_file (str): Path to the audio file, which will be synced to the API disk storage via another parameter + start_time (datetime): The start time of the audio + end_time (datetime): The end time of the audio + track_id (str): The track id of the task
+Returns:
+ +Client/Listener/api.py
94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 |
|
post_video(uid, video_file, start_time, end_time)
+
+Post metadata of the video to the API. +Args: + uid (str): uuid of this video section + video_file (str): Path to the video file, which will be synced to the API disk storage via another parameter + it will also hold the information in the file name about the start/end time + start_time (datetime): The start time of the video + end_time (datetime): The end time of the video +Returns:
+ +Client/Listener/api.py
138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 |
|
queue_speech_to_text(uid, audio_index, start_time, end_time)
+
+Optional, used to queue the speech to text task +Args: + uid (str): uuid of the audio + audio_index (str): The audio index, which can be used to identify the audio + start_time (datetime): The start time of the audio + end_time (datetime): The end time of the audio
+ + +Returns:
+Type | +Description | +
---|---|
+ str
+ |
+
+
+
+ The track id of the task + |
+
Client/Listener/api.py
170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 |
|
register_device(device_name=None, device_type=None, description=None)
+
+Register the device to the API. +Args: + device_name (Optional[str]): The device name, you can name it if you want to distinguish it better later + device_type (Optional[str]): The device type, this can be used to distinguish the device type + description (Optional[str]): The description of the device
+Returns:
+ +Client/Listener/api.py
60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 |
|
upload_file(source_file, dest_path)
+
+Upload the file to the API
+ +Client/Listener/api.py
229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 |
|
AudioAcquire
+
+
+Client/Listener/audios_acquire.py
18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 |
|
__init__(api_domain='', token='', home_id='', energy_threshold=5000, default_microphone='pulse', record_timeout=30000, sampling_time=0.25, track_cluster=None)
+
+The audio acquire class
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
api_domain |
+
+ str
+ |
+
+
+
+ the api domain + |
+
+ ''
+ |
+
token |
+
+ str
+ |
+
+
+
+ the api token + |
+
+ ''
+ |
+
home_id |
+
+ str
+ |
+
+
+
+ the home id + |
+
+ ''
+ |
+
energy_threshold |
+
+ int
+ |
+
+
+
+ the energy threshold for the audio + |
+
+ 5000
+ |
+
default_microphone |
+
+ str
+ |
+
+
+
+ the default microphone + |
+
+ 'pulse'
+ |
+
record_timeout |
+
+ int
+ |
+
+
+
+ the record timeout + |
+
+ 30000
+ |
+
sampling_time |
+
+ float
+ |
+
+
+
+ the sampling time in seconds, default is 0.25 + |
+
+ 0.25
+ |
+
track_cluster |
+
+ str
+ |
+
+
+
+ the track cluster + |
+
+ None
+ |
+
Client/Listener/audios_acquire.py
19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 |
|
get_source()
+
+Get the source of the audio +Returns:
+ +Client/Listener/audios_acquire.py
69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 |
|
main()
+
+The main function
+ +Client/Listener/audios_acquire.py
183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 |
|
S3_BUCKET = 'openomni'
+
+
+ module-attribute
+
+
+ROOT_PATH = os.path.dirname(os.path.abspath(file)) +DATA_DIR = Path(ROOT_PATH) / "data"
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+API_DOMAIN = "http://localhost:8000"
+S3_BUCKET = "openomni" => change this to your bucket name
+DataMock
+
+
+We will first extract the audio and video from the video file. +And then treat it as current time + any time in the future.
+Then save them into the data folder as other did
+For the mock US-Election debate +It is: +- 02:53,3:20,20:20,20:39,33:38,34:18,55:15,55:40,80:05,80:18
+ +Client/Listener/mock/data_extraction.py
29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 |
|
extract_audio_and_video(input_video_path, start_second, end_second, start_time, end_time, output_audio_path)
+
+Extracts the audio and video from a specified segment of a video file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
input_video_path |
+
+ str
+ |
+
+
+
+ Path to the input video file. + |
+ + required + | +
start_second |
+
+ int
+ |
+
+
+
+ Start time in seconds. + |
+ + required + | +
end_second |
+
+ int
+ |
+
+
+
+ End time in seconds. + |
+ + required + | +
output_audio_path |
+
+ str
+ |
+
+
+
+ Path to save the extracted audio file. + |
+ + required + | +
Client/Listener/mock/data_extraction.py
111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 |
|
replay(time_ranges, input_video_path)
+
+Replays the audio and video from the specified time +Args: + time_ranges (List[int, int]): List of time ranges in seconds. + input_video_path (str): Path to the input video file.
+Returns:
+ +Client/Listener/mock/data_extraction.py
71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 |
|
split_video_in_minutes(video_path, output_folder, fps=1)
+
+
+ staticmethod
+
+
+Splits a video into images.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
video_path |
+
+ str
+ |
+
+
+
+ Path to the video file. + |
+ + required + | +
output_folder |
+
+ str
+ |
+
+
+
+ Folder to save the extracted images. + |
+ + required + | +
fps |
+
+ int
+ |
+
+
+
+ Frames per second to extract. Defaults to 1. + |
+
+ 1
+ |
+
Client/Listener/mock/data_extraction.py
198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 |
|
APISyncHandler
+
+
+
+ Bases: FileSystemEventHandler
Sync the files to s3 when they are created, modified, moved or deleted
+ +Client/Listener/storage.py
113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 |
|
LocalSyncHandler
+
+
+
+ Bases: FileSystemEventHandler
Sync the files to disk when they are created, modified, moved or deleted
+ +Client/Listener/storage.py
24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 |
|
__init__(src_path, dest_path, sshpass)
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
src_path |
+
+ str
+ |
+
+
+
+ The source path to sync + |
+ + required + | +
dest_path |
+
+ str
+ |
+
+
+
+ The destination path to sync + |
+ + required + | +
sshpass |
+
+ str
+ |
+
+
+
+ The password to ssh + |
+ + required + | +
Client/Listener/storage.py
29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 |
|
on_any_event(event)
+
+Sync the files to disk when they are created, modified, moved or deleted +Args: + event:
+Returns:
+ +Client/Listener/storage.py
42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 |
|
S3SyncHandler
+
+
+
+ Bases: FileSystemEventHandler
Sync the files to s3 when they are created, modified, moved or deleted
+ +Client/Listener/storage.py
74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 |
|
StorageHandler
+
+
+Client/Listener/storage.py
150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 |
|
__init__(api_domain='', token='', home_id=None, dest_dir=None, dest_password=None)
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
api_domain |
+
+ str
+ |
+
+
+
+ the api domain + |
+
+ ''
+ |
+
token |
+
+ str
+ |
+
+
+
+ the api token + |
+
+ ''
+ |
+
home_id |
+
+ int
+ |
+
+
+
+ the home id + |
+
+ None
+ |
+
dest_dir |
+
+ str
+ |
+
+
+
+ the destination directory to sync, like + |
+
+ None
+ |
+
dest_password |
+
+ str
+ |
+
+
+
+ the destination password to sync + |
+
+ None
+ |
+
Client/Listener/storage.py
151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 |
|
timer
+
+
+util function used to log the time taken by a part of program
+ +Client/Listener/utils.py
40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 |
|
__enter__()
+
+context enter to start write this
+ +Client/Listener/utils.py
61 +62 +63 +64 +65 +66 +67 |
|
__exit__(context, value, traceback)
+
+context exit will write this
+ +Client/Listener/utils.py
69 +70 +71 +72 +73 +74 |
|
__init__(logger, message)
+
+init the timer
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
logger |
+
+ Logger
+ |
+
+
+
+ Logger +the logger to log the message + |
+ + required + | +
message |
+
+ str
+ |
+
+
+
+ str +the message to log + |
+ + required + | +
Client/Listener/utils.py
45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 |
|
get_logger(logger_name=None, stream=True)
+
+init the logger, give it proper format, log them both in terminal stream and file
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
logger_name |
+
+ Optional[str]
+ |
+
+
+
+ str +the name of the logger + |
+
+ None
+ |
+
stream |
+
+ bool
+ |
+
+
+
+ bool +whether to log in the terminal stream + |
+
+ True
+ |
+
Client/Listener/utils.py
9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 |
|
logger = get_logger('video_acquire')
+
+
+ module-attribute
+
+
+PER_LENGTH = 1800 # 30 minutes
+WIDTH = 640 +HEIGHT = 480 +FPS = 24.0
+VideoAcquire
+
+
+Client/Listener/videos_acquire.py
32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 |
|
__init__(width=WIDTH, height=HEIGHT, fps=FPS, per_video_length=PER_LENGTH, api_domain='', token='', home_id=None)
+
+init the video acquire +Args: + width: (int) the width of the video + height (int): the height of the video + fps (float): the frame per second + per_video_length (int): the length of the video + api_domain (str): the domain of the api + token (str): the token of the api + home_id (int): the home id
+ +Client/Listener/videos_acquire.py
33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 |
|
record()
+
+start to record the video
+ +Client/Listener/videos_acquire.py
64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 |
|
API
+
+
+Client/Responder/api.py
11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 |
|
__init__(domain=API_DOMAIN, token='', home_id=None)
+
+init the api +Args: + domain (str): the domain of the api + token (str): the token of the api + home_id (int): the home id
+ +Client/Responder/api.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 |
|
get_spoken_speech()
+
+Call the API to get the speech to play +Returns:
+ +Client/Responder/api.py
62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 |
|
register_device(device_name=None, device_type=None, description=None)
+
+register the device +Args: + device_name (Optional[str]): the device name + device_type (Optional[str]): the device type + description (Optional[str]): the description of the device
+Returns:
+ +Client/Responder/api.py
30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 |
|
API_DOMAIN = 'http://localhost:8000'
+
+
+ module-attribute
+
+
+# get parent of current folder as root
+ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
+DATA_DIR = Path(ROOT_PATH) / "data"
+
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+
+API_DOMAIN = "http://localhost:8000"
+
+ PlaySpeech
+
+
+Client/Responder/play_speech.py
18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 |
|
play_audio_file(file_path)
+
+
+ staticmethod
+
+
+Play audio file from the given +Args: + file_path (Path): The path of the audio file
+Returns:
+ +Client/Responder/play_speech.py
65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 |
|
play_audio_url(url)
+
+
+ staticmethod
+
+
+Play audio file from the given +Args: + url (str): The URL of the audio file
+Returns:
+ +Client/Responder/play_speech.py
43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 |
|
text_to_speech_and_play(content)
+
+
+ staticmethod
+
+
+Convert text to speech and play +Args: + content (str): The content to be converted to speech
+Returns:
+ +Client/Responder/play_speech.py
19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 |
|
timer
+
+
+util function used to log the time taken by a part of program
+ +Client/Responder/utils.py
39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 |
|
__enter__()
+
+context enter to start write this
+ +Client/Responder/utils.py
58 +59 +60 +61 +62 +63 +64 |
|
__exit__(context, value, traceback)
+
+context exit will write this
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
context |
+
+ Optional[Type[BaseException]]
+ |
+
+
+
+ the context + |
+ + required + | +
value |
+
+ Optional[BaseException]
+ |
+
+
+
+ the value + |
+ + required + | +
traceback |
+
+ Optional[TracebackType]
+ |
+
+
+
+ the traceback + |
+ + required + | +
Client/Responder/utils.py
66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 |
|
__init__(logger, message)
+
+init the timer
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
logger |
+
+ Logger
+ |
+
+
+
+ the logger + |
+ + required + | +
message |
+
+ str
+ |
+
+
+
+ the message to be logged + |
+ + required + | +
Client/Responder/utils.py
44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 |
|
get_logger(logger_name=None, stream=True)
+
+init the logger, give it proper format, log them both in terminal stream and file
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
logger_name |
+
+ Optional[str]
+ |
+
+
+
+ the logger name + |
+
+ None
+ |
+
stream |
+
+ bool
+ |
+
+
+
+ whether to log in the terminal stream + |
+
+ True
+ |
+
Client/Responder/utils.py
10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 |
|
The annotation is built upon the Django Admin interface, which is a powerful tool to manage the data and easy to do the +customisation.
+This is one of the reason why we choose Django as the backend framework, as there are heaps of documentations, tools, +packages if you need something more here.
+It is built upon the Django change_list
template, the code to make this happen is in API/hardware/admin.py
+and API/hardware/forms.py
The class DataMultiModalConversationAdmin
is where we implement it.
First we will look at the conversation model to see where we store the annotation data:
+# API/hardware/models.py
+
+class DataMultiModalConversation(models.Model):
+ # ...
+ annotations = models.JSONField(
+ help_text="The annotations of the emotion detection",
+ null=True,
+ blank=True,
+ default=dict,
+ )
+
+ multi_turns_annotations = models.JSONField(
+ help_text="The annotations of the multi-turns",
+ null=True,
+ blank=True,
+ default=dict,
+ )
+ tags = TaggableManager(blank=True)
+
+Which means, the annotation will be saved to these two json field: annotations
and multi_turns_annotations
.
The saved json will be in following schema:
+{
+ "1": {
+ // 1 is the user id
+ "annotation_speech2text": "your annotation",
+ "annotation_speech2text_score": 3
+ // from 0-5
+ // ...
+ }
+}
+
+The annotation field is in
+# in API/hardware/forms.py
+class MultiModalAnnotationForm(forms.ModelForm):
+ annotation_speech2text = forms.CharField(
+ required=False,
+ widget=forms.Textarea(attrs={"rows": 1}),
+ help_text="Please provide your annotation for the speech-to-text task.",
+ )
+ annotation_speech2text_score = forms.IntegerField(
+ initial=0,
+ widget=forms.NumberInput(attrs={"min": 0, "max": 5}),
+ required=False,
+ help_text="Score for the speech-to-text results, score from 0 to 5.",
+ )
+ annotation_text_generation = forms.CharField(
+ required=False,
+ widget=forms.Textarea(attrs={"rows": 1}),
+ help_text="Please provide your annotation for the text generation task.",
+ )
+
+ annotation_text_generation_score = forms.IntegerField(
+ initial=0,
+ widget=forms.NumberInput(attrs={"min": 0, "max": 5}),
+ required=False,
+ help_text="Score for the text generation results, score from 0 to 5.",
+ )
+
+ annotation_text2speech_score = forms.IntegerField(
+ initial=0,
+ widget=forms.NumberInput(attrs={"min": 0, "max": 5}),
+ required=False,
+ help_text="Score for the text-to-speech results, score from 0 to 5.",
+ )
+
+ annotation_overall_score = forms.IntegerField(
+ initial=0,
+ widget=forms.NumberInput(attrs={"min": 0, "max": 5}),
+ required=False,
+ help_text="Overall score for this multi-modal task, score from 0 to 5.",
+ )
+
+ annotation_overall_comment = forms.CharField(
+ required=False,
+ widget=forms.Textarea(attrs={"rows": 1}),
+ help_text="Please provide your overall annotation for this multi-modal task.",
+ )
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ if self.instance.annotations:
+ current_user_annotation = self.instance.annotations.get(
+ str(self.current_user.id), {}
+ )
+ for key, value in current_user_annotation.items():
+ if key in self.fields:
+ self.fields[key].initial = value
+ if self.instance.multi_turns_annotations:
+ current_user_annotation = self.instance.multi_turns_annotations.get(
+ str(self.current_user.id), {}
+ )
+ for key, value in current_user_annotation.items():
+ if key in self.fields:
+ self.fields[key].initial = value
+
+These are the fields show up in the change_list page, as show below:
+ +The code inside the __init__
function is in charge of making sure present the data you already annotated to you.
The annotation benchmark and details are analysing the data in the annotations
and multi_turns_annotations
field.
If you want to add a customised annotation field, all you need to do is to add a field in
+the MultiModalAnnotationForm
.
For example, we have an emotion detection task, which will not fit into the conversation model to do the annotation, it +is more like an intermediate task and output for the whole pipeline, however, the quality of it still very important.
+So we also want to be able to annotate these types of tasks.
+Especially during the process to do the application development, we may introduce specific specialised tasks for +different purposes.
+In general, to annotate these types of tasks, the context is still the input and output of the conversation, so when we +design the model, using emotion detection as an example, we use a FK point to the Conversation model.
+class ContextEmotionDetection(models.Model):
+ multi_modal_conversation = models.ForeignKey(
+ DataMultiModalConversation,
+ on_delete=models.CASCADE,
+ related_name="emotion_detection",
+ null=True,
+ blank=True,
+ )
+ result = models.JSONField(
+ help_text="The emotion result of the text", null=True, blank=True, default=dict
+ )
+ logs = models.JSONField(
+ help_text="The logs of the emotion detection",
+ null=True,
+ blank=True,
+ default=dict,
+ )
+ created_at = models.DateTimeField(
+ auto_now_add=True, help_text="The created time of the emotion detection"
+ )
+ updated_at = models.DateTimeField(
+ auto_now=True, help_text="The updated time of the emotion detection"
+ )
+
+ annotations = models.JSONField(
+ help_text="The annotations of the emotion detection",
+ null=True,
+ blank=True,
+ default=dict,
+ )
+
+ class Meta:
+ verbose_name = "Context Emotion"
+ verbose_name_plural = "Context Emotions"
+
+As the conversation model, we also have the annotations
field to store the annotation data.
The schema of the annotation data is the same as the conversation model.
+The annotation form is in API/hardware/forms.py
:
+
+class MultiModalFKEmotionDetectionAnnotationForm(forms.ModelForm):
+ annotation_overall = forms.IntegerField(
+ initial=0,
+ help_text="Overall score for this emotion detection task, score from 0 to 5.",
+ )
+ annotation_overall.widget.attrs.update({"min": 0, "max": 5})
+
+ annotation_text_modality = forms.IntegerField(
+ initial=0, help_text="Score for text modality."
+ )
+ annotation_text_modality.widget.attrs.update({"min": 0, "max": 5})
+
+ annotation_audio_modality = forms.IntegerField(
+ initial=0, help_text="Score for audio modality."
+ )
+ annotation_audio_modality.widget.attrs.update({"min": 0, "max": 5})
+
+ annotation_video_modality = forms.IntegerField(
+ initial=0, help_text="Score for video modality."
+ )
+ annotation_video_modality.widget.attrs.update({"min": 0, "max": 5})
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ if self.instance.annotations:
+ current_user_annotation = self.instance.annotations.get(
+ str(self.current_user.id), {}
+ )
+ for key, value in current_user_annotation.items():
+ if key in self.fields:
+ self.fields[key].initial = value
+
+The form is similar to the conversation annotation form, but the fields are different.
+So if you want to implement one by yourself, all you need to do is copying this for your model.
+ + + + + + + + + + + + + + + +So as we mentioned before, we will have two perspectives to evaluate the performance of the pipeline.
+For the latency part, if you log the time point and duration of each task within the Agent module, you should be able +to +automatically get the latency of each round of conversation. +And the results will be presented in two ways:
+The above figure is the Detailed Latency Benchmark, which will show the latency of each round of conversation.
+The below figure is the Summary Latency Benchmark, which will show the summary statistics of the latency.
+ +For the accuracy part, some of the metrics can be automatically calculated, such as WER for Speech2Text. However, +currently, most of the metrics will need human annotation. +And research about how to get this to be automated is still ongoing and worth investigating.
+So to solve the problem, we build the annotation functionality for the accuracy part.
+Conversation Annotation
+We have a table(Model concept in Django) called conversation to record each round of conversation.
+The conversation will be associated with the input
+and the output
+And the annotation will be based on the input and output.
+As shown in this figure:
+ +Initially, the evaluation measurement metrics we built in includes a score [0,5], 5 means the response is perfect, 0 +means the response is totally wrong. +In this way, we can calculate a quantitative score for the performance of each component within pipeline.
+So for each conversation, you can annotate
+One conversation can be annotated by multiple people, and the final score will be the average of all the scores.
+The below figure shows the default annotation score:
+ +And the overview of the annotation for a conversation:
+ +The annotated details will be shown in the bottom.
+Customised Component Annotation
+And for a specific component within the pipeline, which will not fit in the conversation table above, we will have a +separate table to record the annotation. +For example, the emotion detection will be a customized task we defined and developed, so we will have a separate table +to record the annotation.
+Compared to the above setup, the context part(input and output) will be the same, the annotation measurement metrics +will be different:
+ +Multi-turn Conversation Annotation
+The conversations actually is mulit-turn, which means we also want to be able to annotate multiple turns conversation.
+This is also supported.
+You can assign a "Tag" to a group of conversations, then the last conversation within the group will have an extra +annotated field called "Multi turn annotation overall score and comment".
+ +During and after the annotation process, you can track the progress by the Accuracy|Detail page. +For example:
+ +After all annotation is done, you can view the summary of the accuracy by the Accuracy|Benchmark page.
+ +And your multi turn conversation results can be checked with Accuracy|Multi-Turn Conversation page
+ +In summary, for the evaluation benchmark, latency can be automatically calculated, and accuracy will need human +annotation. +Our tool can help the advancement of the auto or semi-auto evaluation accuracy metrics development by collecting this +kind of data.
+ + + + + + + + + + + + + + + +After the system is set up, we tested the pipeline with two cases:
+One of the intensive conversational scenarios is the debate. +We extracted segments from the US Presidential Debate 2024 +between Biden and Trump, focusing on Biden addressing the +public and handling questions. +These segments were fed into our pipeline to evaluate its performance under different +configurations:
+We ran the Agent modules on a NVIDIA-3080 GPU with 12GB memory.
+To replicate what we are doing, you can download the video +from link
+Put in under the folder Client/Listener/data/mock/US-Election-2024.mp4
And then you can run the following command:
+cd ./Client/Listener
+source venv/bin/activate
+ python3 -m mock.data_extraction --input_video_path your/repo/path/US-Election-2024.mp4 --api_domain https://openomni.ai4wa.com --token your_token --time_points 02:53,3:20,20:20,20:39,33:38,34:18,55:15,55:40,80:05,80:18
+
+You need to run the rest modules, make the running properly
+And all the pipelines we have set up will be fired, all you need to do is to wait for the results.
++
+After annotation, the accuracy performance is here:
+ +The fastest configuration is GPT35_ETE, averaging around 15 seconds, with most of the time consumed by the +text-to-speech part, as the generated content is quite long and comprehensive. The slowest configuration is HF_ETE, +taking around 189 seconds, with the LLM model inference step taking the longest time. QuantizationLLM_ETE takes around +60 seconds, with LLM model inference taking around 28 seconds and our emotion detection model taking around 10 seconds.
+After annotation with our provided interface, the accuracy statistics are automatically generated. As shown in Figure~ +\ref{fig:gpt4oaccuracy}, speech-to-text accuracy is good, while text-to-speech can be improved with more natural emotion +or personality. The text generation, however, is often too general and sometimes inappropriate. Biden's responses are +more in-context and supported by evidence. The only question where our pipeline performed well was the subjective +question about Biden's age, where the GPT-4o pipeline excelled.
+The GPT35_ETE pipeline had the best overall accuracy, but its responses were often in-context yet pompous. Thus, Biden +still outperforms AI.
+In conclusion, AI cannot be the President of the US for now, from both latency and accuracy perspectives.
+While latency and the need for external information currently prevent AI from being the President of the US, the current +state of conversational Agent can be production-ready and useful for areas that are not latency-critical and do not require +extensive external knowledge. Assisting indoor activities for the visually impaired can be one such potential +application area.
+We set up a camera, microphone, and speaker, and then prepared several types of questions useful for the visually +impaired population. These included questions about the location of specific objects and how to grab them, navigating to +another position indoors, and inquiries about the surrounding environment. We sampled six questions and fed them to the +GPT4O_ETE pipeline. The latency statistics show that each +conversational request from the user is responded to within approximately 30 seconds, which is expected under the +hardware setup.
+After annotation with context, the accuracy performance is impressive, with an overall score of 4.7/5. Most responses +are accurate; however, the LLM lacks specific skills for assisting the visually impaired. For example, when asked where +the keyboard is, the response could include more instructive steps on how to grab it rather than a general description. +This indicates that while the conversational Agent is nearly production-ready for assisting the visually impaired with +indoor activities, improvements in latency and response content are still needed.
+ + + + + + + + + + + + + + + +We will present setup and run the end to end pipeline.
+Mainly will include these sections:
+ + + + + + + + + + + + + + + + +We have provided a list of built-in Pipelines for demonstration and evaluation purpose.
+For example, we got:
+After the evaluation, we found out, for all the pipelines, under the Nvidia 3080 GPU, none of the latency is +acceptable. +The best performance is the GPT-3.5 pipeline with text only as input, which has a latency of around 8-10 seconds. +For the GPT-4o, the API latency is around 3-8 seconds, when you feed more images data in, the latency will increase +significantly.
+So if you have an idea, and a solution, you want to test out whether it is acceptable, how should you do that?
+First go to the code place: API/orchestrator/chian/clusters.py
This is the places we put all the pipeline configurations, as shown above.
+Here is an example of the pipeline configuration:
+
+"""
+Cluster for gpt3.5 model and gpt3.5 with RAG
+"""
+CLUSTER_GPT_35_RAG_ETE_CONVERSATION_NAME = "CLUSTER_GPT_35_RAG_ETE_CONVERSATION"
+CLUSTER_GPT_35_RAG_ETE_CONVERSATION = {
+ "openai_speech2text": {
+ "order": 0,
+ "extra_params": {},
+ "component_type": "task",
+ "task_name": "openai_speech2text",
+ },
+ "completed_openai_speech2text": {
+ "order": 1,
+ "extra_params": {},
+ "component_type": "signal",
+ "task_name": None,
+ },
+ "created_data_text": {
+ "order": 2,
+ "extra_params": {},
+ "component_type": "signal",
+ "task_name": None,
+ },
+ "completed_rag": {
+ "order": 3,
+ "extra_params": {},
+ "component_type": "task",
+ "task_name": "rag",
+ },
+ "completed_openai_gpt_35": {
+ "order": 4,
+ "extra_params": {
+ "prompt_template": """{text}""",
+ },
+ "component_type": "task",
+ "task_name": "openai_gpt_35",
+ },
+ "completed_openai_text2speech": {
+ "order": 5,
+ "extra_params": {},
+ "component_type": "task",
+ "task_name": "openai_text2speech",
+ },
+}
+
+First, we need to define a cluster name, which is the pipeline.
+This cluster name will be the one when you started your audio acquisition, you can specify which cluster you want to use
+by the --track_cluster
When you stop talk and the audio acquisition will send the audio you spoke to the API with a track_id, which is in the
+format of T-{cluster_name}-{uid_for_this_conversation}
.
+Like: T-CLUSTER_GPT_35_RAG_ETE_CONVERSATION-f6bf3b78e4f5484abf949790c8451856
.
API side will base on the cluster_name to trigger the relevant pipeline and tasks, and all the downstream task for +this conversation within the pipeline will be grouped with this track_id to ensure the pipeline observability.
+We have a table called Task
to manage all the different types of tasks, this can be decomposed to a queue system if we
+want to bring this into production for more complex design.
+Currently, to maintain a simple and flexible design, every Agent task will be recorded inside the Task
table, and we will
+base on this table to analyse the progress of the pipeline, health of the system.
For example, with the track_id above, the example pipeline will be triggered.
+First, it will go to create a task, which name will be openai_speech2text
, and status will be pending
with proper
+parameters.
Agent consumer will consume this task, and after the task is done, it will update this task record with the
+status completed
.
+And the metadata generated during the Agent module running process will be saved in the result_json
field, with two
+primary key
It will be like this:
+ +When Agent module call API endpoint to update the task status, it will trigger a completed_task
Signal (
+check Django Signal for further details), which is acting as
+the Router
to dispatch different following tasks.
The specific code to implement this is in API/orchestrator/models.py, line 114-119
def save(self, *args, **kwargs):
+ # if it is updated, then we need to call the chain
+ if self.result_status == "completed":
+ completed_task.send(sender=self, data=self.__dict__)
+ super().save(*args, **kwargs)
+
+This will override the Django Model save function for Task
, when the result_status is changing to completed, it will
+trigger the completed_task
signal.
The completed_task
signal is defined in API/orchestrator/signals.py
from django.dispatch import Signal
+
+completed_task = Signal() # task itself
+
+The receiver
of this signal is defined in API/orchestrator/chain/complted_task.py
from django.dispatch import receiver
+
+from authenticate.utils.get_logger import get_logger
+from orchestrator.chain.models import TaskData
+from orchestrator.chain.signals import (
+ completed_emotion_detection,
+ completed_hf_llm,
+ completed_openai_gpt_4o_text_and_image,
+ completed_openai_gpt_4o_text_only,
+ completed_openai_gpt_35,
+ completed_openai_speech2text,
+ completed_openai_text2speech,
+ completed_quantization_llm,
+ completed_rag,
+ completed_speech2text,
+ completed_task,
+ completed_text2speech,
+)
+from orchestrator.models import Task
+
+logger = get_logger(__name__)
+
+
+@receiver(completed_task)
+def trigger_completed_task(sender, **kwargs):
+ """
+ Trigger the multi-modal emotion detection.
+ """
+ data = kwargs.get("data", {})
+ task_data = TaskData(**data)
+
+ if task_data.task_name == "speech2text":
+ return completed_speech2text.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+
+ if task_data.task_name == "emotion_detection":
+ return completed_emotion_detection.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+
+ if task_data.task_name == "quantization_llm":
+ return completed_quantization_llm.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+
+ if task_data.task_name == "text2speech":
+ logger.info("Text2Speech task completed")
+ return completed_text2speech.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+
+ if task_data.task_name == "hf_llm":
+ logger.info("HF LLM task completed")
+ return completed_hf_llm.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+
+ if task_data.task_name == "openai_speech2text":
+ logger.info("OpenAI Speech2Text task completed")
+ return completed_openai_speech2text.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+
+ if task_data.task_name == "openai_gpt_4o_text_and_image":
+ logger.info("OpenAI GPT4O task completed")
+ return completed_openai_gpt_4o_text_and_image.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+ if task_data.task_name == "openai_gpt_35":
+ logger.info("OpenAI GPT3.5 task completed")
+ return completed_openai_gpt_35.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+
+ if task_data.task_name == "openai_gpt_4o_text_only":
+ logger.info("OpenAI GPT4O Text Only task completed")
+ return completed_openai_gpt_4o_text_only.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+ if task_data.task_name == "rag":
+ logger.info("RAG task completed")
+ return completed_rag.send(sender=sender, data=data, track_id=task_data.track_id)
+
+ if task_data.task_name == "openai_text2speech":
+ logger.info("OpenAI Text2Speech task completed")
+ return completed_openai_text2speech.send(
+ sender=sender, data=data, track_id=task_data.track_id
+ )
+
+ task_name_choices = Task.get_task_name_choices()
+ task_name_choices_list = [task[0] for task in task_name_choices]
+ if task_data.task_name not in task_name_choices_list:
+ logger.error("Task name not found is not in the choices list")
+ return
+ logger.critical(f"{task_data.task_name} task completed, however, no action taken.")
+
+We can see from the code, what it is doing is to use the track_id to match the cluster name, and then +base on the configuration of this cluster, identify the next component within the cluster(pipeline).
+For example, the steps will be like:
+openai_speech2text
task is finishedcompleted_task
is then triggeredcompleted_openai_speech2text
completed_openai_speech2text
receiver is in API/orchestrator/chain/completed_openai_speech2text.py
, it will
+ process the results into DataText
object, save it to the database.created_data_text
. The class ClusterManager
will be in charge of this.extra_params
added to the parameters, and then save
+ it to the database, the Agent module will listen to this, and consume it.ClusterManager code is in API/orchestrator/chain/manager.py
"""
+
+Here will define a list of clusters
+
+Each cluster will have a list of chain components
+
+For example, end-to-end conversation chain will have the following components:
+
+- completed_speech2text
+- created_data_text
+- completed_emotion_detection
+- completed_quantization_llm
+- completed_text2speech
+"""
+
+from typing import Optional, Tuple
+
+from authenticate.utils.get_logger import get_logger
+from orchestrator.chain.clusters import CLUSTERS
+from orchestrator.chain.signals import created_data_text
+from orchestrator.models import Task
+
+logger = get_logger(__name__)
+
+
+class ClusterManager:
+
+ @staticmethod
+ def get_cluster(cluster_name: str):
+ """
+ Get the cluster
+
+ Args:
+ cluster_name (str): The cluster name
+ """
+ if cluster_name in CLUSTERS:
+ return CLUSTERS[cluster_name]
+ return None
+
+ @staticmethod
+ def get_next_chain_component(
+ cluster: dict, current_component: str
+ ) -> Tuple[Optional[str], Optional[dict]]:
+ """
+ Get the next chain
+
+ Args:
+ cluster (dict): The cluster
+ current_component (str): The current component
+
+ Return:
+ Tuple[Optional[str], Optional[dict]]: The next component and its parameters if exists, otherwise None
+ """
+ chain = []
+ for key, value in cluster.items():
+ chain.append(key)
+ chain.sort(key=lambda x: cluster[x]["order"])
+ if current_component == "init":
+ """
+ If this is the start of the chain, then return the first component
+ """
+ return chain[0], cluster[chain[0]]
+ # index of the current component
+ current_component_index = chain.index(current_component)
+ next_index = current_component_index + 1
+ if next_index >= len(chain):
+ return None, None
+ return chain[next_index], cluster[chain[next_index]]
+
+ @classmethod
+ def get_next(cls, cluster_name: str, current_component: str):
+ """
+ Get the next component
+
+ Args:
+ cluster_name (str): The cluster name
+ current_component (str): The current component
+ """
+ cluster = cls.get_cluster(cluster_name)
+ if cluster is None:
+ return None
+ return ClusterManager.get_next_chain_component(cluster, current_component)
+
+ @classmethod
+ def chain_next(
+ cls,
+ track_id: Optional[str],
+ current_component: str,
+ next_component_params: dict,
+ name: str = None,
+ user=None,
+ ):
+ """
+ Chain to the next component
+
+ Args:
+ current_component (str): The current component
+ track_id (str): The track ID
+ next_component_params (dict): The next component parameters
+ name (str): The task name, it will be used to aggregate the task
+ user (None): The user
+ """
+ logger.info(f"Current component: {current_component}")
+ logger.info(f"Next component params: {next_component_params}")
+ cluster_name = track_id.split("-")[1]
+ next_component_name, next_component = cls.get_next(
+ cluster_name, current_component
+ )
+ logger.info(f"Next component: {next_component_name}")
+
+ if next_component_name is None:
+ return
+ # do something with the next component
+ # It can be a task or a signal
+ next_parameters = {
+ **next_component_params,
+ **next_component.get("extra_params", {}),
+ }
+ logger.info(next_parameters)
+ logger.info(next_component_name)
+
+ if next_component["component_type"] == "task":
+ task = Task.create_task(
+ user=user,
+ name=name or next_component["task_name"],
+ task_name=next_component["task_name"],
+ parameters=next_parameters,
+ track_id=track_id,
+ )
+ logger.info(f"Task {task.id} created for {next_component['task_name']}")
+ return task.id
+ elif next_component["component_type"] == "signal":
+ if next_component_name == "created_data_text":
+ created_data_text.send(
+ sender=next_component_params.get("sender"),
+ data=next_component_params.get("data"),
+ track_id=track_id,
+ user=user,
+ )
+ return None
+
+So if you want to customise the pipeline, you can add your own cluster configuration
+in API/orchestrator/chian/clusters.py
.
For example, if we want to add a cluster called CLUSTER_VOICE_ETE_CONVERSATION
, which will first get the image to
+description text, and then feed to an end-to-end voice model with the audio, generate the output audio
The configuration will be like this:
+CLUSTER_VOICE_ETE_CONVERSATION_NAME = "CLUSTER_VOICE_ETE_CONVERSATION"
+
+CLUSTER_VOICE_ETE_CONVERSATION = {
+ "image2text": {
+ "order": 0,
+ "extra_params": {},
+ "component_type": "task",
+ "task_name": image2text,
+ },
+ "completed_image2text": {
+ "order": 1,
+ "extra_params": {},
+ "component_type": "signal",
+ "task_name": None,
+ },
+ "completed_voice2voice": {
+ "order": 2,
+ "extra_params": {},
+ "component_type": "task",
+ "task_name": "voice2voice",
+ },
+}
+
+CLUSTERS = {
+ # ...
+ CLUSTER_VOICE_ETE_CONVERSATION_NAME: CLUSTER_VOICE_ETE_CONVERSATION,
+}
+
+Then add new added task_name to the task_name_choices in API/orchestrator/models.py
+@staticmethod
+def get_task_name_choices():
+ """
+ Get dynamic task name choices
+ Returns:
+ list: List of tuples containing task name choices
+ """
+ # Here you can fetch the choices from an external source or database
+ return [
+ # ...
+ ("rag", "RAG"),
+ ("image2text", "Image2Text"),
+ ("voice2voice", "Voice2Voice"),
+ ]
+
+
+@staticmethod
+def task_ml_task_mapping() -> dict:
+ return {
+ # ...
+ "rag": "rag",
+ "image2text": "image2text",
+ "voice2voice": "voice2voice",
+ }
+
+This will make sure you can choose the two new added task when create a new Task
.
Next, you will need to create two new Signals:
+in API/orchestrator/chain/signals.py
+from django.dispatch import Signal
+
+completed_task = Signal() # task itself
+# ....
+completed_image2text = Signal()
+completed_voice2voice = Signal()
+
+Then create the two receiver to handle the two signals in API/orchestrator/chain/completed_image2text.py
+and API/orchestrator/chain/completed_voice2voice.py
Use other existing receiver as reference to implement the new receiver.
+Then you need to register the two signal in the orchestrator/chain/apps.py
def ready(self): # noqa
+ # Import signals
+ # ....
+ import orchestrator.chain.completed_image2text # noqa
+ import orchestrator.chain.completed_voice2voice # noqa
+
+Until now, the API end is done for this newly added pipeline.
+You will need to go to implement the Agent
module to consume the new added pipeline, mainly is the added type of tasks.
We have added two type of tasks, which means we will need to add two modules to handle this.
+So create a image2text module in Agent/modules/image2text/__init__.py
and a voice2voice module
+in Agent/modules/voice2voice/__init__.py
You can then implement the code as you want within each of the respective folder, use other modules as reference to +implement it.
+After that, you will need to register both task in the main.py
It will not be hard to add them in if you follow what we have done for others.
+Then go to the client end, start the audio acquisition, and specify the cluster name to the newly added cluster name.
+Then start talking, and you will see the pipeline is triggered, and the Agent module will consume the task.
+If there is anything wrong, try to use the above explanation to debug the problem.
+ + + + + + + + + + + + + + + +Deployment mode will be All in One Local Machine for demonstration purposes. +This means all of your components will be running on your local machine or your PC. +To get started, you will need a decent machine (as we will run some local LLMs) with camera, microphone and speaker, +which most of the laptops have.
+And you will also need to have Python, Docker installed on your machine.
+Step 1: Clone the repository
+# switch to a proper directory
+git clone git@github.com:AI4WA/OpenOmniFramework.git
+
+Step 2: Get API running
+cd ./OpenOmniFramework
+cd ./API
+# Run it inside docker, this is the easiest way to get started
+docker compose up
+
+After this, you should be able to access the API at http://localhost:8000
.
+Username/Password will be admin/password
.
Step 3: Grab the Token for Authentication
+Login to the API admin, go to http://localhost:8000/authtoken/tokenproxy/
and click Add Token
.
Step 4: Collect Audio and Video Data
+cd ./OpenOmniFramework
+cd ./Client/Listener
+
+# create the virtual environment if this is your first time run this
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+# run video acquire
+python3 videos_acquire.py --token your_token_from_step_3
+
+You should be able to see something like this: +
+Then open a new terminal
+cd ./OpenOmniFramework
+cd ./Client/Listener
+
+# create the virtual environment if this is your first time run this
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+# run audio acquire
+python3 audios_acquire.py --token your_token_from_step_3 --track_cluster CLUSTER_GPT_4O_ETE_CONVERSATION
+# you can change the cluster to the one your need
+
+You will see something like this: +
+If everything works, you should be able to check the newly create Data Audios
, Data Videos
and Speech2Text
Tasks
+in API Admin page.
+Something like below:
+
+
+
Step 5: Run Agent models
+Now we need to start Agent module to consume the Tasks
.
cd ./OpenOmniFramework
+cd ./Agent
+
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+Before we start the Agent module, there are some pre-configurations we need to do.
+As provided functionalities within Agent modules support OpenAI call, HuggingFace call, and there is also our provided +emotion detection module.
+We need to get them setup first.
+Setup OpenAI and HuggingFace Environment Variable
+Create a .env
file in ./Agent
folder, and add the following content:
HF_TOKEN=Your_HuggingFace_Token
+OPENAI_API_KEY=Your_OpenAI_API_KEY
+
+Otherwise, you can run
+export HF_TOKEN=Your_HuggingFace_Token
+export OPENAI_API_KEY=Your_OpenAI_API_KEY
+
+For the model part, if you want to get our emotion detection model running, you will need to download the model +from download link
+And put it in the folder: ./Agent/data/models/emotion_detection/model_data
.
+It should be like this
Then you should be ready to run the Agent module.
+# run the Agent module
+python3 main.py --token your_token_from_step_3
+
+You can also skip the steps to install the requirements, directly run the Agent module with docker.
+TOKEN=XXX docker compose up
+
+This will allow you to utilise the GPU resources on your machine if you have one.
+ +Until now, you will have the client side to feed the video/audio data to the API, and the Agent module to consume the data.
+Step 6: Play speech audio in client side
+cd ./OpenOmniFramework
+cd ./Client/Responder
+
+# create the virtual environment if this is your first time run this
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r requirements.dev.txt # if you are doing further development
+
+# run the audio player
+
+python3 play_speech.py --token your_token_from_step_3
+
+You will see something like this:
+ +Until now, you should have the whole pipeline running on your local machine.
+You should see new tasks created as expected in the Tasks
page in the API admin page.
+As shown below:
And in the Detailed Latency Benchmark page, you should be able to see the latency of each round of conversation.
+ + + + + + + + + + + + + + + + +Notes: Here we will use the architecture diagram to explain the scripts.
+Hello everyone, excited to introduce your our latest work, multimodal Open Source Conversational AI Framework: OpenOmni +Framework.
+Why We build this comes from these points:
+So To make sure people do not re-invent the wheel, we build this framework, for details, you can check our +documentations.
+Here what we will demo is one of the useful scenario for the framework, use conversational AI to help visually impaired +people to navigate indoors.
+Notes: Here video will show the three devices, one is the AI module, one is the API module, and one is the client +module.
+We will use the local network deployment option, deploy AI/API/Client modules within three different devices within the +same network.
+So the audio, and video data will be collected from this raspberry pi, and then sync to the API server, together with +the metadata.
+Then the API end will base on the parameters, allocate the task for the AI module, AI will then process the task. For +example, speech2text, llm generation, text2speech.
+All the computational heavy work will happen here.
+When the results are finished, the data or generated audio will be sent back to the API side
+And the client side will have another thread to list to the API side, and then play the audio, fulfill the conversation.
+Note: then next is the demo.
+Ok, let's start the audio collection process, all other modules are currently running now.
+Hi, where is my cup of coffee, can you tell me how to grab it?
+Notes: Then wait for the response, and then play the audio.
+After this finished, as a research or benchmark process.
+Note: show the API interface here.
+You will directly get the latency details and summary stats from our API interface.
+We can see how long each module takes, and in total how long it takes to finish the whole process, which part are the +model inference time, which part is data transfer time.
+Also, we can annotate and benchmark the accuracy of the process, whether the response tones, content is fit for the +scenario.
+After the annotation, you will be able to see the details and summaries in this page.
+This can be powerful for the conversational AI system research and application development, you can use this evaluate +different combination of pipeline.
+Gathering datasets, etc.
+Hopefully this can benefit the wider community, and we are looking forward to your feedback.
+docker compose up
for the API module, make sure export STORAGE_SOLUTION=local
./start_pi.sh
./start_pi.sh
python3 storage.py xxx
to sync the datapython3 main.py
to start the AI module