-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathsaying_phrase.py
73 lines (56 loc) · 3.07 KB
/
saying_phrase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import re
from ruchatbot.utils.chunk_tools import normalize_chunk
class SayingPhraseEntry:
"""Разобранный на элементы подстановочный терм в выводимой say фразе"""
def __init__(self, name, raw_text, tags):
self.raw_text = raw_text
self.name = name
self.tags = tags
class SayingPhrase:
def __init__(self, phrase_str):
self.raw_text = phrase_str
self.name2entry = dict()
for slot_prefix in ['NP', 'VI', 'AP', 'VP']:
if '$'+slot_prefix in phrase_str:
for m in re.finditer(r'\$(' + slot_prefix + r'\d+)', phrase_str):
entry_text = m.group(0)
entry_name = m.group(1)
entry_tags = None
args_pos = m.span()[1]
if args_pos <= len(phrase_str)-1 and phrase_str[args_pos] == '(':
args_start = args_pos + 1
args_end = phrase_str.index(')', args_start)
entry_text = phrase_str[m.span()[0]:args_end+1]
args_str = phrase_str[args_start:args_end]
entry_tags = [a.strip() for a in args_str.strip().split(',')]
entry = SayingPhraseEntry(entry_name, entry_text, entry_tags)
self.name2entry[entry_name] = entry
def has_entries(self):
return len(self.name2entry) > 0
def substitute_bound_variables(phrase, condition_matching_results, text_utils):
assert(isinstance(phrase, SayingPhrase))
utterance = phrase.raw_text
# Если нужно сделать подстановку сматченных при проверке условия чанков.
if condition_matching_results and condition_matching_results.has_groups() and phrase.has_entries():
for name, group in condition_matching_results.groups.items():
group_ancor = name.upper()
if group_ancor in phrase.name2entry:
entry = phrase.name2entry[group_ancor]
words = group.words
# Нужно просклонять чанк?
if entry.tags:
tokens = group.phrase_tokens
target_tags = dict()
for tag in entry.tags:
if tag in ('ИМ', 'ВИН', 'РОД', 'ТВОР', 'ДАТ', 'ПРЕДЛ'):
target_tags['ПАДЕЖ'] = tag
elif tag in ('ЕД', 'МН'):
target_tags['ЧИСЛО'] = tag
else:
raise NotImplementedError()
words = normalize_chunk(tokens, edges=None, flexer=text_utils.flexer,
word2tags=text_utils.word2tags, target_tags=target_tags)
# Подставляем слова чанка вместо подстроки $NP1(...)
entry_value = ' '.join(words)
utterance = utterance.replace(entry.raw_text, entry_value)
return utterance