Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wechaty方案下添加对群组语音消息的响应 #623

Merged
merged 2 commits into from
Mar 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ pip3 install --upgrade tiktoken
"image_create_prefix": ["画", "看", "找"], # 开启图片回复的前缀
"conversation_max_tokens": 1000, # 支持上下文记忆的最多字符数
"speech_recognition": false, # 是否开启语音识别
"group_speech_recognition": false, # 是否开启群组语音识别 (目前仅支持wechaty)
"use_azure_chatgpt": false, # 是否使用Azure ChatGPT service代替openai ChatGPT service. 当设置为true时需要设置 open_ai_api_base,如 https://xxx.openai.azure.com/
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", # 人格描述,
}
Expand All @@ -130,8 +131,9 @@ pip3 install --upgrade tiktoken

**3.语音识别**

+ 添加 `"speech_recognition": true` 将开启语音识别,默认使用openai的whisper模型识别为文字,同时以文字回复,目前只支持私聊 (注意由于语音消息无法匹配前缀,一旦开启将对所有语音自动回复);
+ 添加 `"voice_reply_voice": true` 将开启语音回复语音,但是需要配置对应语音合成平台的key,由于itchat协议的限制,只能发送语音mp3文件,若使用wechaty则回复的是微信语音。
+ 添加 `"speech_recognition": true` 将开启语音识别,默认使用openai的whisper模型识别为文字,同时以文字回复,该参数仅支持私聊 (注意由于语音消息无法匹配前缀,一旦开启将对所有语音自动回复);
+ 添加 `"group_speech_recognition": true` 将开启群组语音识别,默认使用openai的whisper模型识别为文字,同时以文字回复,参数仅支持群聊 (可以匹配group_chat_prefix和group_chat_keyword,目前仅支持wechaty方案);
+ 添加 `"voice_reply_voice": true` 将开启语音回复语音(同时作用于私聊和群聊),但是需要配置对应语音合成平台的key,由于itchat协议的限制,只能发送语音mp3文件,若使用wechaty则回复的是微信语音。

**4.其他配置**

Expand Down
98 changes: 97 additions & 1 deletion channel/wechat/wechaty_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,67 @@ async def on_message(self, msg: Message):
await self._do_send_group_img(content, room_id)
else:
await self._do_send_group(content, room_id, room_name, from_user_id, from_user_name)
elif room and msg.type() == MessageType.MESSAGE_TYPE_AUDIO:
# 群组&语音消息
room_id = room.room_id
room_name = await room.topic()
from_user_id = from_contact.contact_id
from_user_name = from_contact.name
is_at = await msg.mention_self()
config = conf()
# 是否开启语音识别、群消息响应功能、群名白名单符合等条件
if config.get('group_speech_recognition') and (
'ALL_GROUP' in config.get('group_name_white_list') or room_name in config.get(
'group_name_white_list') or self.check_contain(room_name, config.get(
'group_name_keyword_white_list'))):
# 下载语音文件
voice_file = await msg.to_file_box()
silk_file = TmpDir().path() + voice_file.name
await voice_file.to_file(silk_file)
logger.info("[WX]receive voice file: " + silk_file)
# 将文件转成wav格式音频
wav_file = silk_file.replace(".slk", ".wav")
with open(silk_file, 'rb') as f:
silk_data = f.read()
pcm_data = pysilk.decode(silk_data)

with wave.open(wav_file, 'wb') as wav_data:
wav_data.setnchannels(1)
wav_data.setsampwidth(2)
wav_data.setframerate(24000)
wav_data.writeframes(pcm_data)
if os.path.exists(wav_file):
converter_state = "true" # 转换wav成功
else:
converter_state = "false" # 转换wav失败
logger.info("[WX]receive voice converter: " + converter_state)
# 语音识别为文本
query = super().build_voice_to_text(wav_file).content
# 校验关键字
match_prefix = self.check_prefix(query, config.get('group_chat_prefix')) \
or self.check_contain(query, config.get('group_chat_keyword'))
# Wechaty判断is_at为True,返回的内容是过滤掉@之后的内容;而is_at为False,则会返回完整的内容
if match_prefix is not None:
# 故判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容,用于实现类似自定义+前缀触发生成AI图片的功能
prefixes = config.get('group_chat_prefix')
for prefix in prefixes:
if query.startswith(prefix):
query = query.replace(prefix, '', 1).strip()
break
# 返回消息
img_match_prefix = self.check_prefix(query, conf().get('image_create_prefix'))
if img_match_prefix:
query = query.split(img_match_prefix, 1)[1].strip()
await self._do_send_group_img(query, room_id)
elif config.get('voice_reply_voice'):
await self._do_send_group_voice(query, room_id, room_name, from_user_id, from_user_name)
else:
await self._do_send_group(query, room_id, room_name, from_user_id, from_user_name)
else:
logger.info("[WX]receive voice check prefix: " + 'False')
# 清除缓存文件
os.remove(wav_file)
os.remove(silk_file)

async def send(self, message: Union[str, Message, FileBox, Contact, UrlLink, MiniProgram], receiver):
logger.info('[WX] sendMsg={}, receiver={}'.format(message, receiver))
Expand All @@ -189,7 +250,6 @@ async def _do_send(self, query, reply_user_id):
except Exception as e:
logger.exception(e)


async def _do_send_voice(self, query, reply_user_id):
try:
if not query:
Expand Down Expand Up @@ -261,6 +321,42 @@ async def _do_send_group(self, query, group_id, group_name, group_user_id, group
reply_text = '@' + group_user_name + ' ' + reply_text.strip()
await self.send_group(conf().get("group_chat_reply_prefix", "") + reply_text, group_id)

async def _do_send_group_voice(self, query, group_id, group_name, group_user_id, group_user_name):
if not query:
return
context = Context(ContextType.TEXT, query)
group_chat_in_one_session = conf().get('group_chat_in_one_session', [])
if ('ALL_GROUP' in group_chat_in_one_session or \
group_name in group_chat_in_one_session or \
self.check_contain(group_name, group_chat_in_one_session)):
context['session_id'] = str(group_id)
else:
context['session_id'] = str(group_id) + '-' + str(group_user_id)
reply_text = super().build_reply_content(query, context).content
if reply_text:
reply_text = '@' + group_user_name + ' ' + reply_text.strip()
# 转换 mp3 文件为 silk 格式
mp3_file = super().build_text_to_voice(reply_text).content
silk_file = mp3_file.replace(".mp3", ".silk")
# Load the MP3 file
audio = AudioSegment.from_file(mp3_file, format="mp3")
# Convert to WAV format
audio = audio.set_frame_rate(24000).set_channels(1)
wav_data = audio.raw_data
sample_width = audio.sample_width
# Encode to SILK format
silk_data = pysilk.encode(wav_data, 24000)
# Save the silk file
with open(silk_file, "wb") as f:
f.write(silk_data)
# 发送语音
t = int(time.time())
file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
await self.send_group(file_box, group_id)
# 清除缓存文件
os.remove(mp3_file)
os.remove(silk_file)

async def _do_send_group_img(self, query, reply_room_id):
try:
if not query:
Expand Down
1 change: 1 addition & 0 deletions config-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"group_chat_in_one_session": ["ChatGPT测试群"],
"image_create_prefix": ["画", "看", "找"],
"speech_recognition": false,
"group_speech_recognition": false,
"voice_reply_voice": false,
"conversation_max_tokens": 1000,
"expires_in_seconds": 3600,
Expand Down
1 change: 1 addition & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

#语音设置
"speech_recognition": False, # 是否开启语音识别
"group_speech_recognition": False, # 是否开启群组语音识别
"voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key
"voice_to_text": "openai", # 语音识别引擎,支持openai和google
"text_to_voice": "baidu", # 语音合成引擎,支持baidu和google
Expand Down