import numpy as np
def three_n(data_set):
m = data_set.shape[0] # m 樣本個(gè)數(shù)(n維)
dist_mat = np.diag(np.ones(m) * np.inf) # 初始化 距離矩陣
for r in range(m):
for c in range(r + 1, m):
dist_mat[r, c] = np.linalg.norm(data_set[r] - data_set[c])
dist_mat = dist_mat + dist_mat.T # 距離矩陣 計(jì)算完成
adjacency = np.zeros((m, m)) # 初始化 有向圖的鄰接矩陣
nonzero_len_init = 0
r = 0
while 1:
row_min_index = np.argmin(dist_mat, axis=1) # 每一行的最小距離的索引,也就是每個(gè)數(shù)據(jù)的第k最近鄰居
for i in range(m): # 行和為每一個(gè)頂點(diǎn)的出度,代表著距離每一個(gè)頂點(diǎn)最近的(1的個(gè)數(shù))個(gè)
adjacency[i, row_min_index[i]] = 1 # 列和為每一個(gè)頂點(diǎn)的入度,代表著鄰居的個(gè)數(shù),也是該頂點(diǎn)出現(xiàn)在其他頂點(diǎn)的鄰居中的次數(shù),即“密度”
dist_mat[i, row_min_index[i]] = np.inf # 更新距離矩陣
density = np.sum(adjacency, axis=0, dtype=np.int32) # 計(jì)算每個(gè)頂點(diǎn)的“密度”
nonzero_len = len(np.nonzero(density)[0]) # 統(tǒng)計(jì)“密度”不為0的頂點(diǎn)個(gè)數(shù)
r += 1
if nonzero_len == m or nonzero_len == nonzero_len_init:
nn = {}
p = 0
for i in range(m):
nn[i + 1] = np.nonzero(adjacency.T)[1].tolist()[p:p + density[i]]
nn[i + 1] = [x + 1 for x in nn[i + 1]]
p = p + density[i]
return density.tolist(), nn, r
nonzero_len_init = nonzero_len
s = np.mat('1,2,3;2,3,5;4,6,7;1,2,1;4,4,3;2,6,9;1,2,5')
d = three_n(s)
print(d)
運(yùn)行結(jié)果
函數(shù)返回結(jié)果:
第一個(gè)元素:每一個(gè)數(shù)據(jù)點(diǎn)的鄰居數(shù)(被其他數(shù)據(jù)點(diǎn)的鄰域覆蓋的次數(shù),即“密度”)
第二個(gè)元素:每一個(gè)數(shù)據(jù)點(diǎn)的鄰域集(每一列)
第三個(gè)元素:supk