-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathkaggle_test_multi.py
executable file
·47 lines (43 loc) · 1.51 KB
/
kaggle_test_multi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python
from pylearn2.datasets import DenseDesignMatrix
from pylearn2.utils import serial
from theano import tensor as T
from theano import function
from glob import glob
import pickle
import numpy as np
import csv
import gc
def process(mdl, ds, batch_size=100):
# This batch size must be evenly divisible into number of total samples!
mdl.set_batch_size(batch_size)
X = mdl.get_input_space().make_batch_theano()
Y = mdl.fprop(X)
y = T.argmax(Y, axis=1)
f = function([X], y)
yhat = []
for i in xrange(ds.X.shape[0] / batch_size):
x_arg = ds.X[i * batch_size:(i + 1) * batch_size, :]
yhat.append(f(x_arg.astype(X.dtype)))
return np.array(yhat).ravel()
tst = pickle.load(open('saved_tst.pkl', 'rb'))
ds = DenseDesignMatrix(X=tst)
clfs = glob('ensemble_clf/*.pkl')
if (len(clfs) % 2) == 0:
raise AttributeError('Use an odd number of voters to avoid ties!')
mdls = (serial.load(f) for f in clfs)
fname = 'results.csv'
test_size = ds.X.shape[0]
res = np.zeros((len(clfs), test_size), dtype='float32')
for n,mdl in enumerate(mdls):
res[n, :] = process(mdl, ds, batch_size=500)
print "Processed model ",n
#Fix for CUDA memory issues - wut?
del mdl
gc.collect()
yhat = np.round(np.mean(res, axis=0))
converted_results = [['id', 'label']] + [[n + 1, int(x)]
for n, x in enumerate(yhat)]
with open(fname, 'w') as f:
csv_f = csv.writer(f, delimiter=',', quoting=csv.QUOTE_NONE)
csv_f.writerows(converted_results)