-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathrefiner_param.py
70 lines (61 loc) · 2.69 KB
/
refiner_param.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import sys, platform
extension = '.exe' if platform.system() == 'Windows' else ""
settings = {
'transformer_model': 'johngiorgi/declutr-small',
}
refiners = {
'global': {
'SenseDisambiguation': 0,
'Thesaurus': 0,
'Wordnet': 0,
'Conceptnet': 0,
'Tagmee': 0,
'Word2Vec': 0,
'Glove': 0,
'Anchor': 0,
'Wiki': 0,
'KrovetzStemmer': 0,
'LovinsStemmer': 0,
'PaiceHuskStemmer': 0,
'PorterStemmer': 0,
'Porter2Stemmer': 0,
'SRemovalStemmer': 0,
'Trunc4Stemmer': 0,
'Trunc5Stemmer': 0,
'BackTranslation': 0,
'Bing': 1,
},
'local': {
'RelevanceFeedback': 0,
'Docluster': 0,
'Termluster': 0,
'Conceptluster': 0,
'OnFields': 0, # make sure that the index for 'extcorpus' is available
'AdapOnFields': 0, # make sure that the index for 'extcorpus' is available
'BertQE': 0,
'RM3': 0,
'T5Transformer': 0,
},
}
# Backtranslation settings
backtranslation = {
'translator': ['bing'], # ['nllb', 'alibaba', 'apertium', 'argos', 'baidu', 'bing', 'caiyun', 'cloudTranslation', 'deepl', 'elia', 'google', 'hujiang', 'iciba', 'iflytek', 'iflyrec', 'itranslate', 'judic', 'languageWire', 'lingvanex', 'niutrans', 'mglip', 'mirai', 'modernMt', 'myMemory', 'papago', 'qqFanyi', 'qqTranSmart', 'reverso', 'sogou', 'sysTran', 'tilde', 'translateCom', 'translateMe', 'utibet', 'volcEngine', 'yandex', 'yeekit', 'youdao']
'src_lng': 'english',
'tgt_lng': ['persian', 'french', 'german', 'russian', 'malay', 'tamil', 'swahili', 'chinese_simplified', 'korean', 'arabic'], # ['persian', 'french', 'german', 'russian', 'malay', 'tamil', 'swahili', 'chinese_simplified', 'korean', 'arabic']
'max_length': 512,
'device': 'cpu',
'model_card': 'facebook/nllb-200-distilled-600M',
}
t5transformer = {
'pair': 0,
'finetune': 0,
'predict': 1,
't5model': 'base.local', # exp. 'small.gc' on google cloud tpu, 'small.local' on local machine
'iter': 5, # number of finetuning iteration for t5
'nchanges': 10, # number of changes to a query
'pretrained_dir': f'./../output/t5-data/pretrained_models/base',
'lseq': {"inputs": 32, "targets": 256},
}
bing = {
'subscription_key ': '',
}