forked from Kappie/3d-scattering-transform
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassify_ct_scans.py
128 lines (102 loc) · 4.52 KB
/
classify_ct_scans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np
import sklearn as sk
import re
from numpy.lib.format import open_memmap
from sklearn import svm
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import minmax_scale
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, make_scorer
import sklearn.metrics
from scattering_transform import number_of_transforms
def classify(dataset, labels):
# classifier = svm.SVC(kernel='linear', C=1)
# selector = PCA(n_components=9)
# dataset = selector.fit_transform(dataset)
# scores = cross_val_score(classifier, dataset, labels, cv=5)
print(labels)
print(dataset.shape)
test_size = 0.2
n_jobs = 8
score_function = sklearn.metrics.accuracy_score
normalize(dataset)
X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=test_size, shuffle=True)
print(y_train)
print(y_train.shape)
# Do a grid search for best SVM.
tuned_parameters = [
{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4, 1e-5, 1e-6], 'C': [1, 10, 100, 1000]},
{'kernel': ['linear'], 'C': [1, 10, 100, 1000]}
]
classifier = GridSearchCV(SVC(), tuned_parameters, cv=5, scoring=make_scorer(score_function), n_jobs=n_jobs)
classifier.fit(X_train, y_train)
print("Best parameters found on training set:")
print(classifier.best_params_)
print("Grid scores on training set:")
means = classifier.cv_results_['mean_test_score']
stds = classifier.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, classifier.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print("Detailed classification report:")
print()
print("The model is trained on the full training set.")
print("The scores are computed on the full test set.")
print()
y_true, y_pred = y_test, classifier.predict(X_test)
print("accuracy on test set:", score_function(y_true, y_pred))
print(sklearn.metrics.confusion_matrix(y_true, y_pred))
# print(classification_report(y_true, y_pred))
print()
return classifier
def classify_single_svm(dataset, labels):
C = 10
gamma = 1e-5
kernel = 'rbf'
test_size = 0.2
score_function = sklearn.metrics.accuracy_score
normalize(dataset)
X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=test_size, shuffle=True)
classifier = SVC()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print("accuracy on test set:", score_function(y_test, y_pred))
print(sklearn.metrics.confusion_matrix(y_test, y_pred))
return classifier
def normalize(dataset):
# Same as in Bruna's thesis.
minmax_scale(dataset, feature_range=(-1, 1), copy=False)
# def load_coefficients_old():
# """
# Only works for results\js0-3_J3_L3_sigma5_2017-11-09_18-58-23.dat !!!
# """
# path = "results\js0-3_J3_L3_sigma5_2017-11-09_18-58-23.dat"
# shape = (100, 4483, 16, 32, 16)
# dtype = np.float32
# coefficients = np.memmap(path, shape=shape, dtype=dtype, mode="r")
# return coefficients
def visualize(classifier):
return 1
if __name__ == '__main__':
AFFECTED = 1
UNAFFECTED = -1
# This is how I now load coefficients:
scattering_coefficients_path = r"F:\GEERT\results\js0-5_J5_npointsfourier20_sigma0.0129_2017-11-24_17-50-25.dat"
scattering_coefficients = open_memmap(scattering_coefficients_path)
max_n_samples_class = 150
n_samples, n_transforms, width, height, depth = scattering_coefficients.shape
# By convention, we always choose the same number of samples of each class, where all the affected
# hemispheres come first.
# n_samples_class = n_samples // 2
n_samples_class = min([n_samples // 2, max_n_samples_class])
labels = np.concatenate( [np.repeat(AFFECTED, n_samples_class), np.repeat(UNAFFECTED, n_samples_class)] )
# flatten all coefficients for each sample. Coefficients are highly correlated. What to do about that?
# e.g. discrete cosine transform or principal component analysis.
scattering_coefficients = scattering_coefficients.reshape((n_samples, -1))
scattering_coefficients = scattering_coefficients[np.r_[:n_samples_class, -n_samples_class:0]]
print(scattering_coefficients.shape)
print(labels.shape)
classify_single_svm(scattering_coefficients, labels)