-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_ann.py
153 lines (97 loc) · 3.8 KB
/
run_ann.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import keras
from keras.models import Sequential
from keras import layers
import pandas
import numpy
dataset = pandas.read_csv("dataset.csv")
#shuffle dataset
dataset = dataset.sample(frac=1)
#-1 to include all possible columns.
target = dataset.iloc[:,-1].values
data = dataset.iloc[:,:-1].values
data = data/255.0
#This is a sequential model
machine = Sequential()
#First layer needs to input
#Last layer needs to predict
# Dense defines the type of layer: Each node is connected
# We pick 512 arbitrarially as a starting number of features. There's going to be trial and errors
# Sigmoid means the sigmoid activation function: digits are between 0 and 1.
# Data.shape gets the number of pixels from the data: 784
# activation function "relu" is a substitute for sigmoid.
# relu is non-linear, accepts inputs from negative infinity to positive infinity, isn't bounded to f(x)=1 so it obtains more information f(x) -> infinity
machine.add(layers.Dense(512,
activation="sigmoid",
input_shape=(data.shape[1],)
))
# Second layer, 128 features
machine.add(layers.Dense(128,
activation="sigmoid"))
#Third layer, 64 features
machine.add(layers.Dense(64,
activation="sigmoid"))
# Final layer, our images could be 0-9, so 10
machine.add(layers.Dense(10,
activation="softmax"))
# We need to inject an optimizer to turn this into a machine.
# Sgd, stochastic gradient descent.
# Sparse categorical cross entropy
# Accuracy, backwards propogation:
# we check the outcome with our dataset.
# We use this to adjust our "beta" later on to improve the accuracy
machine.compile(optimizer="sgd",
loss="sparse_categorical_crossentropy",
metrics=['accuracy'])
machine.fit(data, target, epochs=90, batch_size=64)
#Batch_size: Consult 64 pictures out of the 42 thousand of pictures we have.
#Epochs: how many rounds of backward propogation until you stop.
#Rule, these two numbers multiplied together should be bigger than your data size.
# Accuracy: 0.9187
# The accuracy score talking about the best fitting model
# When we do validation, the validation accuracy could be small since we picked 64 pictures.
# New_target is the prediction
# Argmax, we're going to get a bunch of probabilities, we are still going to want the maximum probability.
new_target = numpy.argmax(machine.predict(new_data), axis=-1)
#Simulating real world with new data
new_data = pandas.read_csv("new_data.csv")
filename_list = new_data.iloc[:,-1].values
new_data = new_data.iloc[:,:-1].values
new_data = new_data/255.0
prediction = numpy.argmax(machine.predict(new_data), axis = -1)
result = pandas.DataFrame()
result['filename'] = filename_list
result['prediction'] = predict
print(results)
# import keras
# from keras.models import Sequential
# from keras import layers
# import pandas
# import numpy
# dataset = pandas.read_csv("dataset.csv")
# dataset = dataset.sample(frac=1)
# target = dataset.iloc[:,-1].values
# data = dataset.iloc[:,:-1].values
# data = data/255.0
# machine = Sequential()
# machine.add(layers.Dense(512,
# activation="relu",
# input_shape=(data.shape[1],)
# ))
# machine.add(layers.Dense(128,
# activation="relu"))
# machine.add(layers.Dense(64,
# activation="relu"))
# machine.add(layers.Dense(10, activation="softmax"))
# machine.compile(optimizer="sgd",
# loss="sparse_categorical_crossentropy",
# metrics=['accuracy'])
# machine.fit(data, target, epochs=30, batch_size=64)
# new_data = pandas.read_csv("new_data.csv")
# filename_list = new_data.iloc[:,-1].values
# new_data = new_data.iloc[:,:-1].values
# new_data = new_data/255.0
# prediction = numpy.argmax(machine.predict(new_data), axis=-1)
# result = pandas.DataFrame()
# result['filename'] = filename_list
# result['prediction'] = prediction
# print(result)