-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhater.py
44 lines (35 loc) · 1.73 KB
/
hater.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from roberta import Roberta
from ldse import LDSE
import numpy as np
class Hater:
def __init__(self, ngram_range, lang='en', summary_ratio=0.1):
vectorizer = TfidfVectorizer(ngram_range=ngram_range,
analyzer='char',
sublinear_tf=True,
smooth_idf=False)
self.ldse_model = LDSE(vectorizer=vectorizer)
self.label0_model = SVC(C=0.1, kernel='linear')
self.label1_model = SVC(C=0.1, kernel='linear')
self.bert_transformer = Roberta(lang=lang, summary_ratio=summary_ratio)
self.bert_model = SVC(C=0.1, kernel='linear')
def fit(self, X, Y, C=['1', '0']):
self.ldse_model.fit(X, Y, C=C)
label0_representation = self.ldse_model.transform(X, C1=False)
label1_representation = self.ldse_model.transform(X, C1=True)
bert_representation = self.bert_transformer.transform(X)
self.label0_model.fit(label0_representation, Y)
self.label1_model.fit(label1_representation, Y)
self.bert_model.fit(bert_representation, Y)
def predict(self, X):
return [self.predict_single(x) for x in X]
def predict_single(self, X):
predicts = {'0':0, '1':0}
F0 = self.ldse_model.transform([X], C1=False)
F1 = self.ldse_model.transform([X], C1=True)
F2 = self.bert_transformer.transform([X])
predicts[self.label0_model.predict(F0)[0]] += 1
predicts[self.label1_model.predict(F1)[0]] += 1
predicts[self.bert_model.predict(F2)[0]] += 1
return 0 if predicts['0'] > predicts['1'] else 1