-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathquery_classfier.py
72 lines (51 loc) · 1.52 KB
/
query_classfier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
from rake_nltk import Rake
import json
import re
min_thresh_len = 50
################## Methods ###################
# query 1 -> minimal number of stopwords ratio
# query 2 -> ACT or abberviations match
# query 3 -> v, vs, versus comes in middle
# query 4 -> long para, stopwords ratio more
##############################################
def hasNumbers(string) :
return bool(re.search(r'\d', string))
def is_query3(query) :
query = query.lower()
if query.find(" v ") or query.find(" vs ") or query.find(" v.s. ") or query.find(" v.s ") or query.find("versus") :
return 100
else :
return -1
def is_query2(query) :
query = query.lower()
if query.find("act") or query.find("bill") :
return 100
else if :
return -1
def is_query4(query) :
query = query.lower()
length = query.length()
with open('stopwords.json') as fp :
stopwords_list = json.load(fp)
if length > min_thresh_len :
return 4
else :
def identify_query_type(query) :
# query 3
type_of_query = is_query3(query)
if type_of_query is 3 :
return type_of_query
# query 2
type_of_query = is_query2(query)
if type_of_query is 2 :
return type_of_query
# query 4
type_of_query = is_query4()
print ("Enter query")
query = raw_input ()
# r = Rake(stopwords=stopwords_list)
# r.extract_keywords_from_text(query)
# D = dict(r.get_word_frequency_distribution())
# print D
identify_query_type(query)