-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmain.py
100 lines (79 loc) · 2.97 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#
# main.py
# Francois Maillet, 2016-02-25
# Copyright (c) 2016 mldb.ai inc. All rights reserved.
#
from os.path import join
mldb.log("Staring plugin init...")
# load wrapper so that mldb object behaves like pymldb
mldb = mldb_wrapper.wrap(mldb)
# expose route to serve UI
mldb.plugin.serve_static_folder("/static", "static")
###
# Load embedding code for rt predictions
inceptionUrl = "https://s3.amazonaws.com/public-mldb-ai/models/inception_dec_2015.zip"
mldb.put('/v1/functions/fetch', {
"type": 'fetcher',
"params": {}
})
mldb.put('/v1/functions/inception', {
"type": 'tensorflow.graph',
"params": {
"modelFileUrl": 'archive+' + inceptionUrl + '#tensorflow_inception_graph.pb',
"inputs": 'fetch({url})[content] AS "DecodeJpeg/contents"',
"outputs": "pool_3"
}
})
filePrefix = "https://s3.amazonaws.com/public-mldb-ai/datasets/dataset-builder/cache"
###
# Reload cached embeddings
def loadCollection(collection, prefix, limit=-1):
mldb.log(" >> Loading collection '%s', prefix:%s limit:%d" % (collection, prefix, limit))
mldb.post("/v1/procedures", {
"type": "import.text",
"params": {
"dataFileUrl": join(prefix, "dataset_creator_images_%s.csv.gz" % collection),
"select": "* EXCLUDING(rowName)",
"named": "rowName",
"limit": limit,
"outputDataset": {"id": collection, "type": "tabular"}
}
})
rez = mldb.post("/v1/procedures", {
"type": "import.text",
"params": {
"dataFileUrl": join(prefix, "dataset_creator_embedding_%s.csv.gz" % collection),
"outputDataset": {
"id": "embedded_images_%s" % collection,
"type": "embedding"
},
"select": "* EXCLUDING(rowName)",
"named": "rowName",
"where": "rowName IN (select rowName() from %s)" % collection,
"structuredColumnNames": True
}
})
mldb.log(rez)
# create nearest neighbour function. this will allow us to quickly find similar images
mldb.put("/v1/functions/nearest_%s" % collection, {
"type": "embedding.neighbors",
"params": {
"dataset": "embedded_images_%s" % collection
}
})
# load built-in collections
limit = -1
if "limit" in mldb.plugin.args:
limit = mldb.plugin.args["limit"]
for collection in ["recipe", "transport", "pets", "realestate"]:
loadCollection(collection, filePrefix, limit)
# load any extra collections specified at plugin creation
if "extraCollections" in mldb.plugin.args:
mldb.log(" >> Loading extra collections")
for coll in mldb.plugin.args["extraCollections"]:
if "name" not in coll:
raise Exception("Key 'name' must be specified for extra collection")
loadCollection(coll["name"], \
coll["prefix"] if "prefix" in coll else filePrefix,
coll["limit"] if "limit" in coll else limit)
mldb.log("Ready")