From d31f7a7bbc745e44c4ba6fd70b6656bf959eb6d7 Mon Sep 17 00:00:00 2001 From: Wenxuan Zhang <48216707+Winfredy@users.noreply.github.com> Date: Thu, 6 Apr 2023 12:28:04 +0800 Subject: [PATCH 01/13] Update preprocess.py change the bug in #52 --- src/utils/preprocess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/preprocess.py b/src/utils/preprocess.py index d12cada7..9638fc8e 100644 --- a/src/utils/preprocess.py +++ b/src/utils/preprocess.py @@ -63,7 +63,7 @@ def generate(self, input_path, save_dir, crop_or_resize='crop'): #load input if not os.path.isfile(input_path): raise ValueError('input_path must be a valid path to video/image file') - elif input_path.split('.')[1] in ['jpg', 'png', 'jpeg']: + elif input_path.split('.')[-1] in ['jpg', 'png', 'jpeg']: # loader for first frame full_frames = [cv2.imread(input_path)] fps = 25 @@ -148,4 +148,4 @@ def generate(self, input_path, save_dir, crop_or_resize='crop'): savemat(coeff_path, {'coeff_3dmm': semantic_npy, 'full_3dmm': np.array(full_coeffs)[0]}) - return coeff_path, png_path, crop_info \ No newline at end of file + return coeff_path, png_path, crop_info From 261b22bc289253bf9d3e23eb880bd0369da2e339 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Thu, 6 Apr 2023 17:00:01 +0800 Subject: [PATCH 02/13] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c2dd1ecd..48bbf469 100644 --- a/README.md +++ b/README.md @@ -161,9 +161,12 @@ python inference.py --driven_audio --source_image Date: Fri, 7 Apr 2023 00:53:47 +0800 Subject: [PATCH 03/13] sd-webui --- scripts/download_models.sh | 23 ++++--- scripts/extension.py | 133 +++++++++++++++++++++++++++++++++++++ src/gradio_demo.py | 2 +- 3 files changed, 146 insertions(+), 12 deletions(-) create mode 100644 scripts/extension.py diff --git a/scripts/download_models.sh b/scripts/download_models.sh index 3ecfe96a..d2afd692 100644 --- a/scripts/download_models.sh +++ b/scripts/download_models.sh @@ -1,12 +1,13 @@ mkdir ./checkpoints -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/auido2exp_00300-model.pth -O ./checkpoints/auido2exp_00300-model.pth -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/auido2pose_00140-model.pth -O ./checkpoints/auido2pose_00140-model.pth -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/epoch_20.pth -O ./checkpoints/epoch_20.pth -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/facevid2vid_00189-model.pth.tar -O ./checkpoints/facevid2vid_00189-model.pth.tar -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/shape_predictor_68_face_landmarks.dat -O ./checkpoints/shape_predictor_68_face_landmarks.dat -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/wav2lip.pth -O ./checkpoints/wav2lip.pth -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/mapping_00229-model.pth.tar -O ./checkpoints/mapping_00229-model.pth.tar -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/BFM_Fitting.zip -O ./checkpoints/BFM_Fitting.zip -wget /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/hub.zip -O ./checkpoints/hub.zip -unzip ./checkpoints/hub.zip -d ./checkpoints/ -unzip ./checkpoints/BFM_Fitting.zip -d ./checkpoints/ \ No newline at end of file +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/auido2exp_00300-model.pth -O ./checkpoints/auido2exp_00300-model.pth +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/auido2pose_00140-model.pth -O ./checkpoints/auido2pose_00140-model.pth +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/epoch_20.pth -O ./checkpoints/epoch_20.pth +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/facevid2vid_00189-model.pth.tar -O ./checkpoints/facevid2vid_00189-model.pth.tar +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/shape_predictor_68_face_landmarks.dat -O ./checkpoints/shape_predictor_68_face_landmarks.dat +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/wav2lip.pth -O ./checkpoints/wav2lip.pth +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/mapping_00229-model.pth.tar -O ./checkpoints/mapping_00229-model.pth.tar +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/BFM_Fitting.zip -O ./checkpoints/BFM_Fitting.zip +wget -nc /~https://github.com/Winfredy/SadTalker/releases/download/v0.0.1/hub.zip -O ./checkpoints/hub.zip + +unzip -n ./checkpoints/hub.zip -d ./checkpoints/ +unzip -n ./checkpoints/BFM_Fitting.zip -d ./checkpoints/ \ No newline at end of file diff --git a/scripts/extension.py b/scripts/extension.py new file mode 100644 index 00000000..ab88cb26 --- /dev/null +++ b/scripts/extension.py @@ -0,0 +1,133 @@ +import os, sys +from pathlib import Path +import tempfile +import gradio as gr +from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call +from modules.shared import opts, OptionInfo +from modules import shared, paths, script_callbacks +import launch +import glob + +def get_source_image(image): + return image + +def get_img_from_txt2img(x): + talker_path = Path(paths.script_path) / "outputs" + imgs_from_txt_dir = str(talker_path / "txt2img-images/") + imgs = glob.glob(imgs_from_txt_dir+'/*/*.png') + imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_txt_dir, x))) + img_from_txt_path = os.path.join(imgs_from_txt_dir, imgs[-1]) + return img_from_txt_path, img_from_txt_path + +def get_img_from_img2img(x): + talker_path = Path(paths.script_path) / "outputs" + imgs_from_img_dir = str(talker_path / "img2img-images/") + imgs = glob.glob(imgs_from_img_dir+'/*/*.png') + imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_img_dir, x))) + img_from_img_path = os.path.join(imgs_from_img_dir, imgs[-1]) + return img_from_img_path, img_from_img_path + +def install(): + + kv = { + "face-alignment": "face-alignment==1.3.5", + "imageio": "imageio==2.19.3", + "imageio-ffmpeg": "imageio-ffmpeg==0.4.7", + "librosa":"librosa==0.8.0", + "pydub":"pydub==0.25.1", + "scipy":"scipy==1.8.1", + "tqdm": "tqdm", + "yacs":"yacs==0.1.8", + "pyyaml": "pyyaml", + "dlib": "dlib-bin", + "gfpgan": "gfpgan", + "TTS": "tts==0.13.0", + } + + for k,v in kv.items(): + print(k, launch.is_installed(k)) + if not launch.is_installed(k): + launch.run_pip("install "+ v, "requirements for SadTalker") + + + ### run the scripts to downlod models to correct localtion. + print('download models for SadTalker') + launch.run("cd " + paths.script_path+"/extensions/SadTalker && bash ./scripts/download_models.sh", live=True) + print('SadTalker is successfully installed!') + + +def on_ui_tabs(): + install() + + sys.path.extend([paths.script_path+'/extensions/SadTalker']) + + repo_dir = paths.script_path+'/extensions/SadTalker/' + + result_dir = opts.sadtalker_result_dir + os.makedirs(result_dir, exist_ok=True) + + from src.gradio_demo import SadTalker + from src.utils.text2speech import TTSTalker + + sad_talker = SadTalker(checkpoint_path=repo_dir+'checkpoints/', config_path=repo_dir+'src/config') + tts_talker = TTSTalker() + + with gr.Blocks(analytics_enabled=False) as audio_to_video: + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + with gr.Tabs(elem_id="sadtalker_source_image"): + with gr.TabItem('Upload image'): + with gr.Row(): + input_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=512,width=512) + + with gr.Row(): + submit_image2 = gr.Button('load From txt2img', variant='primary') + submit_image2.click(fn=get_img_from_txt2img, inputs=input_image, outputs=[input_image, input_image]) + + submit_image3 = gr.Button('load from img2img', variant='primary') + submit_image3.click(fn=get_img_from_img2img, inputs=input_image, outputs=[input_image, input_image]) + + with gr.Tabs(elem_id="sadtalker_driven_audio"): + with gr.TabItem('Upload OR TTS'): + with gr.Column(variant='panel'): + + with gr.Row(): + driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath") + + with gr.Column(variant='panel'): + input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.") + tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary') + tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio]) + + + with gr.Column(variant='panel'): + with gr.Tabs(elem_id="sadtalker_checkbox"): + with gr.TabItem('Settings'): + with gr.Column(variant='panel'): + is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True) + is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True) + submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary') + + with gr.Tabs(elem_id="sadtalker_genearted"): + gen_video = gr.Video(label="Generated video", format="mp4").style(width=256) + + + ### gradio gpu call will always return the html, + submit.click( + fn=wrap_queued_call(sad_talker.test), + inputs=[input_image, + driven_audio, + is_still_mode, + is_enhance_mode], + outputs=[gen_video, ] + ) + + return [(audio_to_video, "SadTalker", "extension")] + +def on_ui_settings(): + talker_path = Path(paths.script_path) / "outputs" + section = ('extension', "SadTalker") + opts.add_option("sadtalker_result_dir", OptionInfo(str(talker_path / "SadTalker/"), "Path to save results of sadtalker", section=section)) + +script_callbacks.on_ui_settings(on_ui_settings) +script_callbacks.on_ui_tabs(on_ui_tabs) \ No newline at end of file diff --git a/src/gradio_demo.py b/src/gradio_demo.py index d2310d73..23777d60 100644 --- a/src/gradio_demo.py +++ b/src/gradio_demo.py @@ -101,6 +101,6 @@ def test(self, source_image, driven_audio, still_mode, use_enhancer, result_dir= torch.cuda.synchronize() import gc; gc.collect() - return return_path + return return_path \ No newline at end of file From cb000b872192d664765eb63a749115930cf67198 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Fri, 7 Apr 2023 01:02:34 +0800 Subject: [PATCH 04/13] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 48bbf469..7fb0c32e 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,9 @@ ## 🔥 Highlight + +- 🔥 The extension of the stable-webui is online. just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, which may takes several minutes to download the models automatically. + - 🔥 Beta version of the `full image mode` is online! checkout [here](/~https://github.com/Winfredy/SadTalker#beta-full-bodyimage-generation) for more details. | still | still + enhancer | [input image @bagbag1815](https://twitter.com/bagbag1815/status/1642754319094108161) | From 13225c22f6f0bab1656efd3eb50f59adb28559a6 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Fri, 7 Apr 2023 01:03:17 +0800 Subject: [PATCH 05/13] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7fb0c32e..cfa12780 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,8 @@ - 🔥 The extension of the stable-webui is online. just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, which may takes several minutes to download the models automatically. +https://user-images.githubusercontent.com/4397546/222513483-89161f58-83d0-40e4-8e41-96c32b47bd4e.mp4 + - 🔥 Beta version of the `full image mode` is online! checkout [here](/~https://github.com/Winfredy/SadTalker#beta-full-bodyimage-generation) for more details. | still | still + enhancer | [input image @bagbag1815](https://twitter.com/bagbag1815/status/1642754319094108161) | @@ -85,9 +87,9 @@ the 3D-aware face render for final video generation. - [ ] training code of each componments. - [ ] Audio-driven Anime Avatar. - [ ] interpolate ChatGPT for a conversation demo 🤔 -- [ ] integrade with stable-diffusion-web-ui. (stay tunning!) +- [x] integrade with stable-diffusion-web-ui. (stay tunning!) + -https://user-images.githubusercontent.com/4397546/222513483-89161f58-83d0-40e4-8e41-96c32b47bd4e.mp4 ## ⚙️ Installation From 86b23587cc716b03e3ef3f941cf54b5205e46281 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Fri, 7 Apr 2023 01:14:38 +0800 Subject: [PATCH 06/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cfa12780..ac574541 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ ## 🔥 Highlight -- 🔥 The extension of the stable-webui is online. just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, which may takes several minutes to download the models automatically. +- 🔥 The extension of the [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) is online. just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, which may takes several minutes to download the models automatically. https://user-images.githubusercontent.com/4397546/222513483-89161f58-83d0-40e4-8e41-96c32b47bd4e.mp4 From b42a105bc7cd0e14e084720cffd7d5f0936f7ca2 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Fri, 7 Apr 2023 01:32:18 +0800 Subject: [PATCH 07/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ac574541..ae3e94d6 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ ## 🔥 Highlight -- 🔥 The extension of the [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) is online. just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, which may takes several minutes to download the models automatically. +- 🔥 The extension of the [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) is online. Just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, which may take several minutes to download the models automatically 😭. https://user-images.githubusercontent.com/4397546/222513483-89161f58-83d0-40e4-8e41-96c32b47bd4e.mp4 From 2abb64f0364b3a65a4755d17deb8098ddd17d24f Mon Sep 17 00:00:00 2001 From: shadow cun Date: Sat, 8 Apr 2023 10:02:30 +0800 Subject: [PATCH 08/13] update sd-webui extension --- README.md | 21 ++-- docs/install.md | 22 ++++ scripts/extension.py | 264 +++++++++++++++++++++---------------------- src/gradio_demo.py | 231 ++++++++++++++++++++----------------- 4 files changed, 287 insertions(+), 251 deletions(-) create mode 100644 docs/install.md diff --git a/README.md b/README.md index ae3e94d6..e0e0f299 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ ## 🔥 Highlight -- 🔥 The extension of the [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) is online. Just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, which may take several minutes to download the models automatically 😭. +- 🔥 The extension of the [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) is online. Just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, checkout more details [here](docs/sdwebui_extension.md). https://user-images.githubusercontent.com/4397546/222513483-89161f58-83d0-40e4-8e41-96c32b47bd4e.mp4 @@ -96,7 +96,7 @@ the 3D-aware face render for final video generation. #### Dependence Installation -
CLICK ME For Mannual Installation +###### Installing Sadtalker on Linux: ```bash git clone /~https://github.com/Winfredy/SadTalker.git @@ -113,23 +113,16 @@ conda install ffmpeg pip install -r requirements.txt +### tts is optional for gradio demo. +### pip install TTS + ``` +More tips about installnation on Windows and the Docker file can be founded [here](docs/install.md) -
+###### Sd-Webui-Extension: -
CLICK For Docker Installation -A dockerfile are also provided by [@thegenerativegeneration](/~https://github.com/thegenerativegeneration) in [docker hub](https://hub.docker.com/repository/docker/wawa9000/sadtalker), which can be used directly as: -```bash -docker run --gpus "all" --rm -v $(pwd):/host_dir wawa9000/sadtalker \ - --driven_audio /host_dir/deyu.wav \ - --source_image /host_dir/image.jpg \ - --expression_scale 1.0 \ - --still \ - --result_dir /host_dir -``` -
#### Download Trained Models diff --git a/docs/install.md b/docs/install.md new file mode 100644 index 00000000..efee2c4a --- /dev/null +++ b/docs/install.md @@ -0,0 +1,22 @@ + + + +### Windows Native + +- Make sure you have `ffmpeg` in the `%PATH%` as suggested in [#54](/~https://github.com/Winfredy/SadTalker/issues/54), following [this](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/) installation to install `ffmpeg`. + + + +### Docker installnation + +A dockerfile are also provided by [@thegenerativegeneration](/~https://github.com/thegenerativegeneration) in [docker hub](https://hub.docker.com/repository/docker/wawa9000/sadtalker), which can be used directly as: + +```bash +docker run --gpus "all" --rm -v $(pwd):/host_dir wawa9000/sadtalker \ + --driven_audio /host_dir/deyu.wav \ + --source_image /host_dir/image.jpg \ + --expression_scale 1.0 \ + --still \ + --result_dir /host_dir +``` + diff --git a/scripts/extension.py b/scripts/extension.py index ab88cb26..ae18771a 100644 --- a/scripts/extension.py +++ b/scripts/extension.py @@ -1,133 +1,133 @@ -import os, sys -from pathlib import Path -import tempfile -import gradio as gr -from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call -from modules.shared import opts, OptionInfo -from modules import shared, paths, script_callbacks -import launch -import glob - -def get_source_image(image): - return image - -def get_img_from_txt2img(x): - talker_path = Path(paths.script_path) / "outputs" - imgs_from_txt_dir = str(talker_path / "txt2img-images/") - imgs = glob.glob(imgs_from_txt_dir+'/*/*.png') - imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_txt_dir, x))) - img_from_txt_path = os.path.join(imgs_from_txt_dir, imgs[-1]) - return img_from_txt_path, img_from_txt_path - -def get_img_from_img2img(x): - talker_path = Path(paths.script_path) / "outputs" - imgs_from_img_dir = str(talker_path / "img2img-images/") - imgs = glob.glob(imgs_from_img_dir+'/*/*.png') - imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_img_dir, x))) - img_from_img_path = os.path.join(imgs_from_img_dir, imgs[-1]) - return img_from_img_path, img_from_img_path - -def install(): - - kv = { - "face-alignment": "face-alignment==1.3.5", - "imageio": "imageio==2.19.3", - "imageio-ffmpeg": "imageio-ffmpeg==0.4.7", - "librosa":"librosa==0.8.0", - "pydub":"pydub==0.25.1", - "scipy":"scipy==1.8.1", - "tqdm": "tqdm", - "yacs":"yacs==0.1.8", - "pyyaml": "pyyaml", - "dlib": "dlib-bin", - "gfpgan": "gfpgan", - "TTS": "tts==0.13.0", - } - - for k,v in kv.items(): - print(k, launch.is_installed(k)) - if not launch.is_installed(k): - launch.run_pip("install "+ v, "requirements for SadTalker") - - - ### run the scripts to downlod models to correct localtion. - print('download models for SadTalker') - launch.run("cd " + paths.script_path+"/extensions/SadTalker && bash ./scripts/download_models.sh", live=True) - print('SadTalker is successfully installed!') - - -def on_ui_tabs(): - install() - - sys.path.extend([paths.script_path+'/extensions/SadTalker']) - - repo_dir = paths.script_path+'/extensions/SadTalker/' - - result_dir = opts.sadtalker_result_dir - os.makedirs(result_dir, exist_ok=True) - - from src.gradio_demo import SadTalker - from src.utils.text2speech import TTSTalker - - sad_talker = SadTalker(checkpoint_path=repo_dir+'checkpoints/', config_path=repo_dir+'src/config') - tts_talker = TTSTalker() - - with gr.Blocks(analytics_enabled=False) as audio_to_video: - with gr.Row().style(equal_height=False): - with gr.Column(variant='panel'): - with gr.Tabs(elem_id="sadtalker_source_image"): - with gr.TabItem('Upload image'): - with gr.Row(): - input_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=512,width=512) - - with gr.Row(): - submit_image2 = gr.Button('load From txt2img', variant='primary') - submit_image2.click(fn=get_img_from_txt2img, inputs=input_image, outputs=[input_image, input_image]) - - submit_image3 = gr.Button('load from img2img', variant='primary') - submit_image3.click(fn=get_img_from_img2img, inputs=input_image, outputs=[input_image, input_image]) - - with gr.Tabs(elem_id="sadtalker_driven_audio"): - with gr.TabItem('Upload OR TTS'): - with gr.Column(variant='panel'): - - with gr.Row(): - driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath") - - with gr.Column(variant='panel'): - input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.") - tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary') - tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio]) - - - with gr.Column(variant='panel'): - with gr.Tabs(elem_id="sadtalker_checkbox"): - with gr.TabItem('Settings'): - with gr.Column(variant='panel'): - is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True) - is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True) - submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary') - - with gr.Tabs(elem_id="sadtalker_genearted"): - gen_video = gr.Video(label="Generated video", format="mp4").style(width=256) - - - ### gradio gpu call will always return the html, - submit.click( - fn=wrap_queued_call(sad_talker.test), - inputs=[input_image, - driven_audio, - is_still_mode, - is_enhance_mode], - outputs=[gen_video, ] - ) - - return [(audio_to_video, "SadTalker", "extension")] - -def on_ui_settings(): - talker_path = Path(paths.script_path) / "outputs" - section = ('extension', "SadTalker") - opts.add_option("sadtalker_result_dir", OptionInfo(str(talker_path / "SadTalker/"), "Path to save results of sadtalker", section=section)) - -script_callbacks.on_ui_settings(on_ui_settings) +import os, sys +from pathlib import Path +import tempfile +import gradio as gr +from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call +from modules.shared import opts, OptionInfo +from modules import shared, paths, script_callbacks +import launch +import glob + +def get_source_image(image): + return image + +def get_img_from_txt2img(x): + talker_path = Path(paths.script_path) / "outputs" + imgs_from_txt_dir = str(talker_path / "txt2img-images/") + imgs = glob.glob(imgs_from_txt_dir+'/*/*.png') + imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_txt_dir, x))) + img_from_txt_path = os.path.join(imgs_from_txt_dir, imgs[-1]) + return img_from_txt_path, img_from_txt_path + +def get_img_from_img2img(x): + talker_path = Path(paths.script_path) / "outputs" + imgs_from_img_dir = str(talker_path / "img2img-images/") + imgs = glob.glob(imgs_from_img_dir+'/*/*.png') + imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_img_dir, x))) + img_from_img_path = os.path.join(imgs_from_img_dir, imgs[-1]) + return img_from_img_path, img_from_img_path + +def install(): + + kv = { + "face-alignment": "face-alignment==1.3.5", + "imageio": "imageio==2.19.3", + "imageio-ffmpeg": "imageio-ffmpeg==0.4.7", + "librosa":"librosa==0.8.0", + "pydub":"pydub==0.25.1", + "scipy":"scipy==1.8.1", + "tqdm": "tqdm", + "yacs":"yacs==0.1.8", + "pyyaml": "pyyaml", + "dlib": "dlib-bin", + "gfpgan": "gfpgan", + } + + for k,v in kv.items(): + print(k, launch.is_installed(k)) + if not launch.is_installed(k): + launch.run_pip("install "+ v, "requirements for SadTalker") + + + if os.getenv('SADTALKER_CHECKPOINTS'): + print('load Sadtalker Checkpoints from '+ os.getenv('SADTALKER_CHECKPOINTS')) + else: + ### run the scripts to downlod models to correct localtion. + print('download models for SadTalker') + launch.run("cd " + paths.script_path+"/extensions/SadTalker && bash ./scripts/download_models.sh", live=True) + print('SadTalker is successfully installed!') + + +def on_ui_tabs(): + install() + + sys.path.extend([paths.script_path+'/extensions/SadTalker']) + + repo_dir = paths.script_path+'/extensions/SadTalker/' + + result_dir = opts.sadtalker_result_dir + os.makedirs(result_dir, exist_ok=True) + + from src.gradio_demo import SadTalker + + if os.getenv('SADTALKER_CHECKPOINTS'): + checkpoint_path = os.getenv('SADTALKER_CHECKPOINTS') + else: + checkpoint_path = repo_dir+'checkpoints/' + + sad_talker = SadTalker(checkpoint_path=checkpoint_path, config_path=repo_dir+'src/config', lazy_load=True) + + with gr.Blocks(analytics_enabled=False) as audio_to_video: + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + with gr.Tabs(elem_id="sadtalker_source_image"): + with gr.TabItem('Upload image'): + with gr.Row(): + input_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=512,width=512) + + with gr.Row(): + submit_image2 = gr.Button('load From txt2img', variant='primary') + submit_image2.click(fn=get_img_from_txt2img, inputs=input_image, outputs=[input_image, input_image]) + + submit_image3 = gr.Button('load from img2img', variant='primary') + submit_image3.click(fn=get_img_from_img2img, inputs=input_image, outputs=[input_image, input_image]) + + with gr.Tabs(elem_id="sadtalker_driven_audio"): + with gr.TabItem('Upload'): + with gr.Column(variant='panel'): + + with gr.Row(): + driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath") + + + with gr.Column(variant='panel'): + with gr.Tabs(elem_id="sadtalker_checkbox"): + with gr.TabItem('Settings'): + with gr.Column(variant='panel'): + is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True) + is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True) + submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary') + + with gr.Tabs(elem_id="sadtalker_genearted"): + gen_video = gr.Video(label="Generated video", format="mp4").style(width=256) + + + ### gradio gpu call will always return the html, + submit.click( + fn=wrap_queued_call(sad_talker.test), + inputs=[input_image, + driven_audio, + is_still_mode, + is_enhance_mode], + outputs=[gen_video, ] + ) + + return [(audio_to_video, "SadTalker", "extension")] + +def on_ui_settings(): + talker_path = Path(paths.script_path) / "outputs" + section = ('extension', "SadTalker") + opts.add_option("sadtalker_result_dir", OptionInfo(str(talker_path / "SadTalker/"), "Path to save results of sadtalker", section=section)) + +script_callbacks.on_ui_settings(on_ui_settings) script_callbacks.on_ui_tabs(on_ui_tabs) \ No newline at end of file diff --git a/src/gradio_demo.py b/src/gradio_demo.py index 23777d60..dc41ecc6 100644 --- a/src/gradio_demo.py +++ b/src/gradio_demo.py @@ -1,106 +1,127 @@ -import torch, uuid -import os, sys, shutil -from src.utils.preprocess import CropAndExtract -from src.test_audio2coeff import Audio2Coeff -from src.facerender.animate import AnimateFromCoeff -from src.generate_batch import get_data -from src.generate_facerender_batch import get_facerender_data - -from pydub import AudioSegment - -def mp3_to_wav(mp3_filename,wav_filename,frame_rate): - mp3_file = AudioSegment.from_file(file=mp3_filename) - mp3_file.set_frame_rate(frame_rate).export(wav_filename,format="wav") - - -class SadTalker(): - - def __init__(self, checkpoint_path='checkpoints', config_path='src/config'): - - if torch.cuda.is_available() : - device = "cuda" - else: - device = "cpu" - - os.environ['TORCH_HOME']= checkpoint_path - - path_of_lm_croper = os.path.join( checkpoint_path, 'shape_predictor_68_face_landmarks.dat') - path_of_net_recon_model = os.path.join( checkpoint_path, 'epoch_20.pth') - dir_of_BFM_fitting = os.path.join( checkpoint_path, 'BFM_Fitting') - wav2lip_checkpoint = os.path.join( checkpoint_path, 'wav2lip.pth') - - audio2pose_checkpoint = os.path.join( checkpoint_path, 'auido2pose_00140-model.pth') - audio2pose_yaml_path = os.path.join( config_path, 'auido2pose.yaml') - - audio2exp_checkpoint = os.path.join( checkpoint_path, 'auido2exp_00300-model.pth') - audio2exp_yaml_path = os.path.join( config_path, 'auido2exp.yaml') - - free_view_checkpoint = os.path.join( checkpoint_path, 'facevid2vid_00189-model.pth.tar') - mapping_checkpoint = os.path.join( checkpoint_path, 'mapping_00229-model.pth.tar') - facerender_yaml_path = os.path.join( config_path, 'facerender.yaml') - - #init model - print(path_of_lm_croper) - self.preprocess_model = CropAndExtract(path_of_lm_croper, path_of_net_recon_model, dir_of_BFM_fitting, device) - - print(audio2pose_checkpoint) - self.audio_to_coeff = Audio2Coeff(audio2pose_checkpoint, audio2pose_yaml_path, - audio2exp_checkpoint, audio2exp_yaml_path, wav2lip_checkpoint, device) - print(free_view_checkpoint) - self.animate_from_coeff = AnimateFromCoeff(free_view_checkpoint, mapping_checkpoint, - facerender_yaml_path, device) - self.device = device - - def test(self, source_image, driven_audio, still_mode, use_enhancer, result_dir='./results/'): - - time_tag = str(uuid.uuid4()) - save_dir = os.path.join(result_dir, time_tag) - os.makedirs(save_dir, exist_ok=True) - - input_dir = os.path.join(save_dir, 'input') - os.makedirs(input_dir, exist_ok=True) - - print(source_image) - pic_path = os.path.join(input_dir, os.path.basename(source_image)) - shutil.move(source_image, input_dir) - - if os.path.isfile(driven_audio): - audio_path = os.path.join(input_dir, os.path.basename(driven_audio)) - - #### mp3 to wav - if '.mp3' in audio_path: - mp3_to_wav(driven_audio, audio_path.replace('.mp3', '.wav'), 16000) - audio_path = audio_path.replace('.mp3', '.wav') - else: - shutil.move(driven_audio, input_dir) - else: - raise AttributeError("error audio") - - - os.makedirs(save_dir, exist_ok=True) - pose_style = 0 - #crop image and extract 3dmm from image - first_frame_dir = os.path.join(save_dir, 'first_frame_dir') - os.makedirs(first_frame_dir, exist_ok=True) - first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(pic_path, first_frame_dir) - - if first_coeff_path is None: - raise AttributeError("No face is detected") - - #audio2ceoff - batch = get_data(first_coeff_path, audio_path, self.device, None) # longer audio? - coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style) - #coeff2video - batch_size = 2 - data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode) - return_path = self.animate_from_coeff.generate(data, save_dir, pic_path, crop_info, enhancer='gfpgan' if use_enhancer else None) - video_name = data['video_name'] - print(f'The generated video is named {video_name} in {save_dir}') - - torch.cuda.empty_cache() - torch.cuda.synchronize() - import gc; gc.collect() - - return return_path - +import torch, uuid +import os, sys, shutil +from src.utils.preprocess import CropAndExtract +from src.test_audio2coeff import Audio2Coeff +from src.facerender.animate import AnimateFromCoeff +from src.generate_batch import get_data +from src.generate_facerender_batch import get_facerender_data + +from pydub import AudioSegment + +def mp3_to_wav(mp3_filename,wav_filename,frame_rate): + mp3_file = AudioSegment.from_file(file=mp3_filename) + mp3_file.set_frame_rate(frame_rate).export(wav_filename,format="wav") + + +class SadTalker(): + + def __init__(self, checkpoint_path='checkpoints', config_path='src/config', lazy_load=False): + + if torch.cuda.is_available() : + device = "cuda" + else: + device = "cpu" + + self.device = device + + os.environ['TORCH_HOME']= checkpoint_path + + self.path_of_lm_croper = os.path.join( checkpoint_path, 'shape_predictor_68_face_landmarks.dat') + self.path_of_net_recon_model = os.path.join( checkpoint_path, 'epoch_20.pth') + self.dir_of_BFM_fitting = os.path.join( checkpoint_path, 'BFM_Fitting') + self.wav2lip_checkpoint = os.path.join( checkpoint_path, 'wav2lip.pth') + + self.audio2pose_checkpoint = os.path.join( checkpoint_path, 'auido2pose_00140-model.pth') + self.audio2pose_yaml_path = os.path.join( config_path, 'auido2pose.yaml') + + self.audio2exp_checkpoint = os.path.join( checkpoint_path, 'auido2exp_00300-model.pth') + self.audio2exp_yaml_path = os.path.join( config_path, 'auido2exp.yaml') + + self.free_view_checkpoint = os.path.join( checkpoint_path, 'facevid2vid_00189-model.pth.tar') + self.mapping_checkpoint = os.path.join( checkpoint_path, 'mapping_00229-model.pth.tar') + self.facerender_yaml_path = os.path.join( config_path, 'facerender.yaml') + + self.lazy_load = lazy_load + + if not self.lazy_load: + #init model + print(self.path_of_lm_croper) + self.preprocess_model = CropAndExtract(self.path_of_lm_croper, self.path_of_net_recon_model, self.dir_of_BFM_fitting, self.device) + + print(self.audio2pose_checkpoint) + self.audio_to_coeff = Audio2Coeff(self.audio2pose_checkpoint, self.audio2pose_yaml_path, + self.audio2exp_checkpoint, self.audio2exp_yaml_path, self.wav2lip_checkpoint, self.device) + print(self.free_view_checkpoint) + self.animate_from_coeff = AnimateFromCoeff(self.free_view_checkpoint, self.mapping_checkpoint, + self.facerender_yaml_path, self.device) + + def test(self, source_image, driven_audio, still_mode, use_enhancer, result_dir='./results/'): + + if self.lazy_load: + #init model + print(self.path_of_lm_croper) + self.preprocess_model = CropAndExtract(self.path_of_lm_croper, self.path_of_net_recon_model, self.dir_of_BFM_fitting, self.device) + + print(self.audio2pose_checkpoint) + self.audio_to_coeff = Audio2Coeff(self.audio2pose_checkpoint, self.audio2pose_yaml_path, + self.audio2exp_checkpoint, self.audio2exp_yaml_path, self.wav2lip_checkpoint, self.device) + print(self.free_view_checkpoint) + self.animate_from_coeff = AnimateFromCoeff(self.free_view_checkpoint, self.mapping_checkpoint, + self.facerender_yaml_path, self.device) + + time_tag = str(uuid.uuid4()) + save_dir = os.path.join(result_dir, time_tag) + os.makedirs(save_dir, exist_ok=True) + + input_dir = os.path.join(save_dir, 'input') + os.makedirs(input_dir, exist_ok=True) + + print(source_image) + pic_path = os.path.join(input_dir, os.path.basename(source_image)) + shutil.move(source_image, input_dir) + + if os.path.isfile(driven_audio): + audio_path = os.path.join(input_dir, os.path.basename(driven_audio)) + + #### mp3 to wav + if '.mp3' in audio_path: + mp3_to_wav(driven_audio, audio_path.replace('.mp3', '.wav'), 16000) + audio_path = audio_path.replace('.mp3', '.wav') + else: + shutil.move(driven_audio, input_dir) + else: + raise AttributeError("error audio") + + + os.makedirs(save_dir, exist_ok=True) + pose_style = 0 + #crop image and extract 3dmm from image + first_frame_dir = os.path.join(save_dir, 'first_frame_dir') + os.makedirs(first_frame_dir, exist_ok=True) + first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(pic_path, first_frame_dir) + + if first_coeff_path is None: + raise AttributeError("No face is detected") + + #audio2ceoff + batch = get_data(first_coeff_path, audio_path, self.device, None) # longer audio? + coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style) + #coeff2video + batch_size = 2 + data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode) + return_path = self.animate_from_coeff.generate(data, save_dir, pic_path, crop_info, enhancer='gfpgan' if use_enhancer else None) + video_name = data['video_name'] + print(f'The generated video is named {video_name} in {save_dir}') + + if self.lazy_load: + del self.preprocess_model + del self.audio_to_coeff + del self.animate_from_coeff + + torch.cuda.empty_cache() + torch.cuda.synchronize() + import gc; gc.collect() + + return return_path + \ No newline at end of file From a675129a48cf8b3dd8f1ddf65a0a775bec0ba562 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Sat, 8 Apr 2023 10:15:47 +0800 Subject: [PATCH 09/13] Update README.md --- README.md | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e0e0f299..cbee7cd3 100644 --- a/README.md +++ b/README.md @@ -94,9 +94,7 @@ the 3D-aware face render for final video generation. ## ⚙️ Installation -#### Dependence Installation - -###### Installing Sadtalker on Linux: +#### Installing Sadtalker on Linux: ```bash git clone /~https://github.com/Winfredy/SadTalker.git @@ -117,11 +115,32 @@ pip install -r requirements.txt ### pip install TTS ``` + More tips about installnation on Windows and the Docker file can be founded [here](docs/install.md) -###### Sd-Webui-Extension: +#### Sd-Webui-Extension: +
CLICK ME + +Installing the lastest version of [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) and install the sadtalker via `extension`. +image + +Then, retarting the stable-diffusion-webui, set some commandline args for . the models will be downloaded automatically in the right place. Alternatively, you can add the path of pre-downloaded sadtalker checkpoints to `SADTALKTER_CHECKPOINTS` in `webui_user.sh`(linux) or `webui_user.bat`(windows) by: +```bash +# windows (webui_user.bat) +set COMMANDLINE_ARGS=--no-gradio-queue --disable-safe-unpickle +set SADTALKER_CHECKPOINTS=D:\SadTalker\checkpoints +# linux (webui_user.sh) +export COMMANDLINE_ARGS=--no-gradio-queue --disable-safe-unpickle +export SADTALKER_CHECKPOINTS=/path/to/SadTalker/checkpoints +``` + +After installation, the SadTalker can be used in stable-diffusion-webui directly. + +image + +
From ef49709c40fd1c13063d8dd085977dbfb4ad7297 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Sat, 8 Apr 2023 10:17:53 +0800 Subject: [PATCH 10/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cbee7cd3..02609c0c 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ ## 🔥 Highlight -- 🔥 The extension of the [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) is online. Just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, checkout more details [here](docs/sdwebui_extension.md). +- 🔥 The extension of the [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) is online. Just install it in `extensions -> install from URL -> /~https://github.com/Winfredy/SadTalker`, checkout more details [here](#sd-webui-extension). https://user-images.githubusercontent.com/4397546/222513483-89161f58-83d0-40e4-8e41-96c32b47bd4e.mp4 From 47f016e4f0d3c6a15ea230749d12b93180306e32 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Sat, 8 Apr 2023 10:19:13 +0800 Subject: [PATCH 11/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 02609c0c..3b3e2df4 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ More tips about installnation on Windows and the Docker file can be founded [her Installing the lastest version of [stable-diffusion-webui](/~https://github.com/AUTOMATIC1111/stable-diffusion-webui) and install the sadtalker via `extension`. image -Then, retarting the stable-diffusion-webui, set some commandline args for . the models will be downloaded automatically in the right place. Alternatively, you can add the path of pre-downloaded sadtalker checkpoints to `SADTALKTER_CHECKPOINTS` in `webui_user.sh`(linux) or `webui_user.bat`(windows) by: +Then, retarting the stable-diffusion-webui, set some commandline args. The models will be downloaded automatically in the right place. Alternatively, you can add the path of pre-downloaded sadtalker checkpoints to `SADTALKTER_CHECKPOINTS` in `webui_user.sh`(linux) or `webui_user.bat`(windows) by: ```bash # windows (webui_user.bat) From 7951293fcd178206f6d4152ee8db3cf0eef779a0 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Sat, 8 Apr 2023 10:32:07 +0800 Subject: [PATCH 12/13] Update install.md --- docs/install.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/install.md b/docs/install.md index efee2c4a..537f8b55 100644 --- a/docs/install.md +++ b/docs/install.md @@ -6,6 +6,9 @@ - Make sure you have `ffmpeg` in the `%PATH%` as suggested in [#54](/~https://github.com/Winfredy/SadTalker/issues/54), following [this](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/) installation to install `ffmpeg`. +### Windows WSL +- Make sure the environment: `export LD_LIBRARY_PATH=/usr/lib/wsl/lib:$LD_LIBRARY_PATH` + ### Docker installnation From 8a99c8bbe792e5671e7eace9588e3104a23414a3 Mon Sep 17 00:00:00 2001 From: Shadow Cun Date: Sat, 8 Apr 2023 11:10:37 +0800 Subject: [PATCH 13/13] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 3b3e2df4..f1b3d1a0 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,10 @@ https://user-images.githubusercontent.com/4397546/222513483-89161f58-83d0-40e4-8 ## 📋 Changelog (Previous changelog can be founded [here](docs/changlelog.md)) +- __[2023.04.06]__: stable-diffiusion webui extension is release. + +- __[2023.04.03]__: Enable TTS in huggingface and gradio local demo. + - __[2023.03.30]__: Launch beta version of the full body mode. - __[2023.03.30]__: Launch new feature: through using reference videos, our algorithm can generate videos with more natural eye blinking and some eyebrow movement.