-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
267 lines (239 loc) · 11.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# -*- coding: utf-8 -*-
from pathlib import Path
from matplotlib.font_manager import FontProperties
from matplotlib import pyplot as plt
import math
from modules import imagenetClassify
from nltk.corpus import wordnet as wn
import os
#see description of constants under the show_images_and_synonyms method.
FONT_SIZE = 10
FONT_PATH = r'C:\WINDOWS\Fonts\yugothm.ttc'
PLOT_MAX_LENGTH = 6
PLOT_SPACING = 2
def get_jap_synonyms(synsets, min_lemmas=2):
"""
:rtype: List<String>.
:param synsets: List<Synset>. List of synset objects
:param min_lemmas: int, The minimum number of lemmas to output. The program will progress through hypernyms until it
reaches the minimum number of lemmas to output
:return: List<String>. a string list of all the lemma names in the synset objects in synsets. This may include lemma names from
hypernyms of the synset objects if the min_lemmas cannot be reached with the lemmas from just the synset objects in
synsets.
"""
def synset_info(synset):
"""
Shows some information about the given synset including hypernyms and holonyms.
Please see "Relations" at wordnet.princeton.edu for more information about what is contained in a synset and
its relations.
:rtype: none
:param synset: Synset. Synset object
"""
print("""
""")
base_set_offset = synset.offset()
print("Debug help: {} ({:08})".format(synset, base_set_offset))
print("-----------------")
set = synset
print("Base set: " + repr(set))
hypernyms = set.hypernyms()
print("Hypernyms: " + repr(hypernyms))
hyponyms = set.hyponyms()
print("Hyponyms: " + repr(hyponyms))
holonyms = set.member_holonyms()
print("Holonyms: " + repr(holonyms))
root_hypernyms = set.root_hypernyms()
print("Root hypernyms: " + repr(root_hypernyms))
print("""----------------------
""")
def append_item(insert_items, destination_list):
"""
Appends the items of one list onto another list and returns the combined list.
:param insert_items: List. Items to be inserted (type: list)
:param destination_list: List. List to insert into.
:return: Combined list
"""
if insert_items:
for i in insert_items: destination_list.append(i)
return destination_list
def get_lemma_names(lemma_list, return_list=[]):
"""
Returns a list of names (strings) of lemmas from a list of lemma objects.
:param lemma_list: List. List of lemma objects
:param return_list: List. List of string lemma names that to add the lemmas to (empty by default)
:return: List of string lemma names.
"""
if lemma_list:
for l in lemma_list:
return_list.append(l.name())
return return_list
def add_japanese_lemmas_from_synsets(synsets, return_list=[]):
"""
Returns all the lemma objects in any of the synsets in a list of synset objects
:param synsets: List<Synset>. List of synset objects to get the lemmas from
:param return_list: List<Lemma>. List to add the lemmas to (empty by default
:return: List<Lemma>. List of all the lemma objects in any of the synsets in a list of synset objects
"""
# assert (len(synsets) == 0, "No synsets in add japanese lemmas from synsets: {}".format(synsets))
for syn in synsets:
# try:
# lemmas = syn.lemmas("jpn")
# except AttributeError:
# lemmas = []
lemmas = syn.lemmas("jpn")
if not lemmas:
print("No Japanese entry for {} -- did not add.".format(syn))
synset_info(syn)
else:
return_list = append_item(lemmas, return_list)
return return_list
lemmas = add_japanese_lemmas_from_synsets(synsets, [])
while len(lemmas) < min_lemmas:
hypernyms = []
for synset in synsets:
append_item(synset.hypernyms(), hypernyms)
synsets = hypernyms
print("adding from {}".format(synsets))
lemmas = add_japanese_lemmas_from_synsets(synsets, lemmas)
synonyms = get_lemma_names(lemmas, return_list=[])
synonyms = set(synonyms)
return synonyms
def get_probable_synsets(classification_list, thresh=.2):
"""
:param classification_list: List<Tuple(name,score,offset)>.
classification output from the image classify method aka a list of tuplets.
[(name1, score1, offset1), (name2, score2, offset2)...]
:param thresh: double. The minimum value of the score for the score to
be called probable.
:return: List<Synset>. List of synset objects.
"""
probable_synsets = []
for classification in classification_list:
if classification[1] > thresh:
synset = wn.synset_from_pos_and_offset(wn.NOUN, classification[2])
probable_synsets.append(synset)
return probable_synsets
def file_exists(path):
"""
:param path: String. path (relative or absolute)
:return: boolean. indicator of existance or non existance
"""
my_file = Path(path)
if my_file.is_file():
return True
else:
return False
def classify_img(impath, tensorboard):
"""
Calls the imagenetClassify.classify_image method in the modules folder. This will input the image through a neural
network and attempt to identify the top five nouns in wordnet (a database of a network of words connected via
a "is-a" word hierarchy. ) that are deemed to be contained in the image. Please see the homepage at
wordnet.princeton.edu for more information.
:param impath: String. path to an image (I believe many different types of images are readable, such as png,
JPG and jpg)
:return: List<Tuple(name,score,offset)>. a list of 5 tuplets.
[(name1, score1, offset1), (name2, score2, offset2)...]
Name is the name of the synset in WordNet
Score is a measure of likelihood that the given synset is in the picture
Offset is the offset of the noun synset (a form of identification for the
synset).
"""
if file_exists(impath):
return imagenetClassify.classify_image(impath, tensorboard)
else:
raise FileNotFoundError("Path did not exist %s" % impath)
def get_synoyms_verbose(impath, thresh, tensorboard):
"""
Utilized with the verbose tag in the main method. Recommended way to find synonyms from an image.
Will output details of the program as its running through the three main methods:
category_data = classify_img(impath)
probable_synsets = get_probable_synsets(category_data)
if probable_synsets:
synonyms = get_jap_synonyms(probable_synsets)
:param impath: string. path of an image
:param thresh: int. threshold for a probable synset
:return: List<String> List of string of words that are thought to be in the picture or categorically related to
things in the picture.
"""
synonyms = []
print("impath: {}.".format(impath))
category_data = classify_img(impath, tensorboard)
# category data format: name, score, offset
print(category_data)
names = [cd[0] for cd in category_data]
print("Image Output Categories: {}.".format(names)) # cd[0] for cd in category_data))
probable_synsets = get_probable_synsets(category_data, thresh=thresh)
print("Probable synset IDs: {}".format(probable_synsets))
if not probable_synsets:
print("No probable categories.")
else:
synonyms = get_jap_synonyms(probable_synsets)
print("Synonym list: {}".format(synonyms))
return synonyms
def show_images_and_synonyms(impaths, thresh=.2, verbose=True, tensorboard=False):
"""
Shows images in a square pyplot grid.
Constants (initialized at the top of the file):
FONT_SIZE = 10 (font size on the plot)
FONT_PATH = r'C:\WINDOWS\Fonts\yugothm.ttc' (location of the sinograph-containing font to use)
PLOT_MAX_LENGTH = 6 (the max size of rows and columns in the plot)
---note: only the first length^2 images from impaths will be shown. For more images, it may be best to write
to a file or to run iterations of the program.
PLOT_SPACING = 2 (interval at which to show images in the plot. I find 1 is too crowded.)
:param impaths: List<String> list of filepath locations of the images to be read.
Paths must avoid accidental escape sequences from \ (backslash)
characters combined with escape characters. Otherwise r can be put
before strings to indicate a raw string, free of escapes.
Relative paths must be relative to the location of this program--an image folder is recommended to be put in the
same folder as "modules".
:param thresh: double. The threshold score for a meaningful categorization in the program. This parameter should be tested
with different values to see if perhaps a higher or lower value gives better results.
:param verbose: boolean. Indicates whether the program will output text during the program or not. Unless it is a
nuisance, this should always be True so that users can see scores and categories not listed in the plot.
"""
num_images = len(impaths)
plot_length = math.ceil(math.sqrt(num_images))
if plot_length > PLOT_MAX_LENGTH:
plot_length = PLOT_MAX_LENGTH
if plot_length * plot_length < num_images:
num_images = plot_length * plot_length
fp = FontProperties(fname=FONT_PATH, size=FONT_SIZE)
plt.figure(figsize=(plot_length * PLOT_SPACING, plot_length * PLOT_SPACING))
plt.suptitle("Thresh = {}".format(thresh))
for i in range(num_images):
impath = impaths[i]
synonyms = []
if verbose:
synonyms = get_synoyms_verbose(impath, thresh, tensorboard)
else:
category_data = classify_img(impath, tensorboard)
probable_synsets = get_probable_synsets(category_data, thresh=thresh)
if probable_synsets:
synonyms = get_jap_synonyms(probable_synsets, lemmas=[])
plt.subplot(plot_length, plot_length, i + 1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
I = plt.imread(impath)
plt.imshow(I)
label = repr(synonyms)
if not synonyms:
label = "無し"
plt.xlabel(label, fontproperties=fp, wrap=True)
print("""
-------------------
""")
plt.show()
if __name__ == '__main__':
# be careful because \U can sometimes mean "Character with 32-bit hex
# value xxxxxxxx" - C:\\ is always acceptable or r"..\.."
# the links need to be in an array
image_filepaths_init_file = "exampleimg/path_list.txt"
print(os.getcwd())
image_filepaths_init_file = os.path.join(os.getcwd(),image_filepaths_init_file)
print(image_filepaths_init_file)
example_images = []
with open(image_filepaths_init_file, 'r') as f:
example_images = f.read().splitlines()
print(example_images)
show_images_and_synonyms(example_images, thresh=0.2)