-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathkNNClassifier.py
63 lines (47 loc) · 1.33 KB
/
kNNClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
class kNNClassifier:
def __init__(self):
self.x = None
self.y = None
self.k = None
self.n_features = None
self.n_samples = None
def train(self, x, y):
x = np.array(x)
y = np.array(y)
assert x.ndim == 2
self.n_samples, self.n_features = x.shape
assert len(y) == self.n_samples
# lazy
self.x = x
self.y = y
def _predict_one(self, x):
dist = np.array([np.linalg.norm(v - x) for v in self.x])
index = dist.argsort()
neighbors = self.y[index[:self.k]]
return np.sum(neighbors, 0)
def predict(self, x, k):
self.k = k
pred = np.array([self._predict_one(v) for v in self.x])
return pred / self.k
def test():
import pandas as pds
df = pds.read_csv('Dataset/watermelon-tiny.csv')
index = np.arange(len(df))
np.random.shuffle(index)
df = df.iloc[index]
x = df[df.columns[1:-1]]
y = df[df.columns[-1]] - 1
l = kNNClassifier()
# a = np.arange(10).reshape((-1, 1))
# b = np.arange(10)
# l.train(a, b)
l.train(x, y)
pred = l.predict(x, 7)
pred = (pred > 0.5)
print(y)
print(pred)
accu = np.count_nonzero(pred == np.array(y)) / float(len(pred))
print(accu)
if __name__ == '__main__':
test()