-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcert_origin.py
187 lines (152 loc) · 8.72 KB
/
cert_origin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
from datetime import timedelta
from itertools import combinations
from typing import List
import matplotlib.pyplot as plt
import numpy as np
from cryptography import x509
from cryptography.x509.oid import NameOID
import fingerprint
from certificate_analysis import certificate_validity_overlap, are_certs_from_same_company, \
get_certs_from_list, gen_pem_files_list, create_key_to_cert_list
"""
This program tries to analyse the certificates and gather useful data
"""
# generate list of all PEM files in directory
# Get statistics about certificates in data set.
# Recorded data:
# duplicate public keys
# Issuer's name & organization
# Number of RSA & DSA keys
def main():
DATA_DIRECTORY = 'C:/Users/drewr/Documents/Graduate_Files/Classes/ENEE657/leaf_cert/'
# DATA_DIRECTORY = '../leaf_cert/'
fingerprint_filename = r"./classiftable_20160716.csv" # from https://crocs.fi.muni.cz/public/papers/usenix2016
mask_prob_dict, groups = fingerprint.read_fingerprint_table(fingerprint_filename)
pem_files_list = gen_pem_files_list(DATA_DIRECTORY)
pem_certs = get_certs_from_list(pem_files_list)
key_to_certificate_dict, num_rsa_keys, num_dsa_keys, unique_keys, duplicate_keys, num_keys_in_each_group = create_key_to_cert_list(
pem_certs, mask_prob_dict, groups)
# dict_org, num_certs_with_no_org_name = count_organization_names(pem_certs)
# dict_common_name, num_certs_with_no_common_name = count_common_names(pem_certs)
print("Total number of certificates: {0}. ".format(len(pem_certs)))
print("Number of DSA certs = {0}".format(num_dsa_keys))
print("Number of RSA certs = {0}. Number of unique certs: {1}. Number of duplicates: {2} ".format(
num_rsa_keys, len(unique_keys), len(duplicate_keys)))
# print("Certificates with no common names: ", num_certs_with_no_common_name)
print("Number of keys per group, assuming taking the most likely group per key:")
print(num_keys_in_each_group)
num_certs_with_duplicate_keys = write_duplicate_keys(key_to_certificate_dict)
print("Certs with dup keys: ", num_certs_with_duplicate_keys)
# print(groups)
changed_subject_dict, validity_overlap_dict, num_changed_subjects, num_overlap_validity = find_certificate_company_changes(
key_to_certificate_dict, exclude_same_day=False)
print("Found {0} changed issuers and {1} overlapping validity instances".format(num_changed_subjects,
num_overlap_validity))
overlap_times_days = overlap_time_dict_to_timedelta_list(validity_overlap_dict)
overlap_times = overlap_time_dict_to_timedelta_list(validity_overlap_dict, False)
seconds_to_days = 1 / (24 * 60 * 60)
min_overlap, max_overlap = min(overlap_times), max(overlap_times)
overlap_std = np.std(overlap_times)
overlap_mean = np.mean(overlap_times)
bin_spacing = np.logspace(np.log10(min_overlap), np.log10(max_overlap), num=15)
plt.hist(overlap_times, bins=bin_spacing)
plt.xlabel('Time (seconds) of certificate overlap (log)')
plt.ylabel('Number of overlapping elements')
plt.title('Histogram of duplicate certificate validity overlap')
plt.gca().set_xscale("log")
plt.text(10 ** 3, 1300, r'$\mu={0:.2f}\ days,\ \sigma={1:.2f}\ days$'.format(overlap_mean * seconds_to_days,
overlap_std * seconds_to_days))
plt.show()
min_overlap_days, max_overlap_days = min(overlap_times_days), max(overlap_times_days)
overlap_days_std = np.std(overlap_times_days)
overlap_days_mean = np.mean(overlap_times_days)
# bin_spacing = np.logspace(np.log10(min_overlap_days), np.log10(max_overlap_days), num=15)
plt.hist(overlap_times_days, bins=20)
plt.xlabel('Time (days) of certificate overlap')
plt.ylabel('Number of overlapping elements')
plt.title('Histogram of duplicate certificate validity overlap')
plt.text(600, 1200, r'$\mu={0:.2f}\ days,\ \sigma={1:.2f}\ days$'.format(overlap_days_mean, overlap_days_std))
plt.show()
print("DONE")
def find_certificate_company_changes(key_to_certificate_dict, exclude_same_day=True):
changed_subject_dict = dict()
validity_overlap_dict = dict()
num_changed_subjects = 0
num_overlap_validity = 0
for pub_mod in key_to_certificate_dict:
if len(key_to_certificate_dict[pub_mod]) > 1:
current_cert_list = key_to_certificate_dict[pub_mod]
changes_list = list()
validity_list = list()
for cert_a, cert_b in combinations(current_cert_list, 2):
assert isinstance(cert_a, x509.Certificate)
assert isinstance(cert_b, x509.Certificate)
# check if company has changed
companies_are_same, difference_list = are_certs_from_same_company(cert_a, cert_b)
if not companies_are_same:
changes_list.append((cert_a, cert_b, difference_list))
num_changed_subjects += 1
# check if overlap between validity dates of certificates
is_overlap, overlap_time = certificate_validity_overlap(cert_a, cert_b,
exclude_same_day=exclude_same_day)
if is_overlap:
validity_list.append((cert_a, cert_b, overlap_time))
num_overlap_validity += 1
if len(changes_list) > 0:
changed_subject_dict[pub_mod] = changes_list
if len(validity_list) > 0:
validity_overlap_dict[pub_mod] = validity_list
# print out validity overlap dict & changed companies dict
with open('validity_overlap.txt', 'w') as out_file:
for key in validity_overlap_dict:
for cert_a, cert_b, overlap in validity_overlap_dict[key]:
out_file.write("{0}-{1}: {2}\n".format(cert_a.serial_number, cert_b.serial_number, overlap))
with open('changed_subjects.txt', 'wb') as out_file:
for key in changed_subject_dict:
for cert_a, cert_b, changes in changed_subject_dict[key]:
try:
out_file.write("{0}-{1}: ".format(cert_a.serial_number, cert_b.serial_number).encode('utf-8'))
for change_tuple in changes:
name_a, name_b = change_tuple
assert isinstance(name_a, x509.NameAttribute)
assert isinstance(name_b, x509.NameAttribute)
assert isinstance(name_a.oid, x509.ObjectIdentifier)
oid_name_str = repr(name_a.oid)
oid_name_str = oid_name_str[oid_name_str.find("name=") + len("name="):oid_name_str.rfind(")")]
out_file.write(
"{0}: {1} -> {2}, ".format(oid_name_str, name_a.value, name_b.value).encode('utf-8'))
out_file.write("\n".encode('utf-8'))
except UnicodeEncodeError:
out_file.write("{0}-{1}: ERROR. Changes has non-valid character".encode('utf-8'))
return changed_subject_dict, validity_overlap_dict, num_changed_subjects, num_overlap_validity
def write_duplicate_keys(key_to_certificate_dict):
certs_with_dup_keys = 0
with open('dupes.txt', 'wb') as file:
for pub_mod in key_to_certificate_dict:
if len(key_to_certificate_dict[pub_mod]) > 1:
certs_with_dup_keys += len(key_to_certificate_dict[pub_mod]) - 1
for i in range(len(key_to_certificate_dict[pub_mod])):
try:
file.write(
key_to_certificate_dict[pub_mod][i].issuer.get_attributes_for_oid(
getattr(NameOID, "COMMON_NAME"))[0].value.encode('utf-8'))
file.write(
key_to_certificate_dict[pub_mod][i].not_valid_after.strftime("%B %d %Y").encode('utf-8'))
except UnicodeEncodeError:
print("Still encountered unicode error")
if i < (len(key_to_certificate_dict[pub_mod]) - 1):
file.write(", ".encode('utf-8'))
file.write("\n".encode('utf-8'))
return certs_with_dup_keys
def overlap_time_dict_to_timedelta_list(valid_overlap_dict, use_days=True) -> List[timedelta]:
overlap_timedeltas = list()
for key in valid_overlap_dict:
for _, _, overlap in valid_overlap_dict[key]:
assert isinstance(overlap, timedelta)
if use_days:
overlap_timedeltas.append(overlap.days)
else:
overlap_timedeltas.append(overlap.total_seconds())
return overlap_timedeltas
if __name__ == "__main__":
main()