Skip to content

Commit

Permalink
refactor: rework options and write TS definitions
Browse files Browse the repository at this point in the history
BREAKING CHANGE:

the `disFunc` option was renamed to `distanceFunction`.
  • Loading branch information
targos committed May 2, 2019
1 parent f02ad0a commit f49c7af
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 175 deletions.
30 changes: 29 additions & 1 deletion hclust.d.ts
Original file line number Diff line number Diff line change
@@ -1 +1,29 @@
declare module 'hclust' {}
declare module 'ml-hclust' {
export type LinkageKind =
| 'single'
| 'complete'
| 'average'
| 'centroid'
| 'ward';
export interface AgnesOptions {
distanceFunction: (a: number[], b: number[]) => number;
kind: LinkageKind;
isDistanceMatrix: boolean;
}

export interface DianaOptions {
distanceFunction: (a: number[], b: number[]) => number;
}

export interface Cluster {
children: Cluster[];
distance: number;
index: number[];
cut: (threshold: number) => Cluster[];
group: (minGroups: number) => Cluster;
traverse: (cb: (cluster: Cluster) => void) => void;
}

export function agnes(data: number[][], options?: AgnesOptions): Cluster;
export function diana(data: number[][], options?: DianaOptions): Cluster;
}
49 changes: 26 additions & 23 deletions src/agnes.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,50 +108,53 @@ function median(values, alreadySorted) {
}
}

var defaultOptions = {
disFunc: euclidean,
kind: 'single',
isDistanceMatrix: false
};

/**
* Continuously merge nodes that have the least dissimilarity
* @param {Array <Array <number>>} distance - Array of points to be clustered
* @param {json} options
* @param {Array<Array<number>>} distance - Array of points to be clustered
* @param {object} [options]
* @param {Function} [options.distanceFunction]
* @param {string} [options.kind]
* @param {boolean} [options.isDistanceMatrix]
* @option isDistanceMatrix: Is the input a distance matrix?
* @constructor
*/
export function agnes(data, options) {
options = Object.assign({}, defaultOptions, options);
export function agnes(data, options = {}) {
const {
distanceFunction = euclidean,
kind = 'single',
isDistanceMatrix = false
} = options;
let kindFunc;

var len = data.length;
var distance = data; // If source
if (!options.isDistanceMatrix) {
distance = distanceMatrix(data, options.disFunc);
if (!isDistanceMatrix) {
distance = distanceMatrix(data, distanceFunction);
}

// allows to use a string or a given function
if (typeof options.kind === 'string') {
switch (options.kind) {
if (typeof kind === 'string') {
switch (kind) {
case 'single':
options.kind = simpleLink;
kindFunc = simpleLink;
break;
case 'complete':
options.kind = completeLink;
kindFunc = completeLink;
break;
case 'average':
options.kind = averageLink;
kindFunc = averageLink;
break;
case 'centroid':
options.kind = centroidLink;
kindFunc = centroidLink;
break;
case 'ward':
options.kind = wardLink;
kindFunc = wardLink;
break;
default:
throw new RangeError('Unknown kind of similarity');
throw new RangeError(`unknown kind of linkage: ${kind}`);
}
} else if (typeof options.kind !== 'function') {
throw new TypeError('Undefined kind of similarity');
} else if (typeof kind !== 'function') {
throw new TypeError('kind must be a string or function');
}

var list = new Array(len);
Expand Down Expand Up @@ -185,7 +188,7 @@ export function agnes(data, options) {
sdistance[f] = list[k].index[f].index;
}
}
dis = options.kind(fdistance, sdistance, distance).toFixed(4);
dis = kindFunc(fdistance, sdistance, distance).toFixed(4);
if (dis in d) {
d[dis].push([list[j], list[k]]);
} else {
Expand Down
166 changes: 15 additions & 151 deletions src/diana.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,116 +3,6 @@ import { euclidean } from 'ml-distance-euclidean';
import ClusterLeaf from './ClusterLeaf';
import Cluster from './Cluster';

/**
* @private
* @param {Array <Array <number>>} cluster1
* @param {Array <Array <number>>} cluster2
* @param {function} disFun
* @returns {number}
*/
function simpleLink(cluster1, cluster2, disFun) {
var m = 10e100;
for (var i = 0; i < cluster1.length; i++) {
for (var j = i; j < cluster2.length; j++) {
var d = disFun(cluster1[i], cluster2[j]);
m = Math.min(d, m);
}
}
return m;
}

/**
* @private
* @param {Array <Array <number>>} cluster1
* @param {Array <Array <number>>} cluster2
* @param {function} disFun
* @returns {number}
*/
function completeLink(cluster1, cluster2, disFun) {
var m = -1;
for (var i = 0; i < cluster1.length; i++) {
for (var j = i; j < cluster2.length; j++) {
var d = disFun(cluster1[i], cluster2[j]);
m = Math.max(d, m);
}
}
return m;
}

/**
* @private
* @param {Array <Array <number>>} cluster1
* @param {Array <Array <number>>} cluster2
* @param {function} disFun
* @returns {number}
*/
function averageLink(cluster1, cluster2, disFun) {
var m = 0;
for (var i = 0; i < cluster1.length; i++) {
for (var j = 0; j < cluster2.length; j++) {
m += disFun(cluster1[i], cluster2[j]);
}
}
return m / (cluster1.length * cluster2.length);
}

/**
* @private
* @param {Array <Array <number>>} cluster1
* @param {Array <Array <number>>} cluster2
* @param {function} disFun
* @returns {number}
*/
function centroidLink(cluster1, cluster2, disFun) {
var x1 = 0;
var y1 = 0;
var x2 = 0;
var y2 = 0;
for (var i = 0; i < cluster1.length; i++) {
x1 += cluster1[i][0];
y1 += cluster1[i][1];
}
for (var j = 0; j < cluster2.length; j++) {
x2 += cluster2[j][0];
y2 += cluster2[j][1];
}
x1 /= cluster1.length;
y1 /= cluster1.length;
x2 /= cluster2.length;
y2 /= cluster2.length;
return disFun([x1, y1], [x2, y2]);
}

/**
* @private
* @param {Array <Array <number>>} cluster1
* @param {Array <Array <number>>} cluster2
* @param {function} disFun
* @returns {number}
*/
function wardLink(cluster1, cluster2, disFun) {
var x1 = 0;
var y1 = 0;
var x2 = 0;
var y2 = 0;
for (var i = 0; i < cluster1.length; i++) {
x1 += cluster1[i][0];
y1 += cluster1[i][1];
}
for (var j = 0; j < cluster2.length; j++) {
x2 += cluster2[j][0];
y2 += cluster2[j][1];
}
x1 /= cluster1.length;
y1 /= cluster1.length;
x2 /= cluster2.length;
y2 /= cluster2.length;
return (
(disFun([x1, y1], [x2, y2]) * cluster1.length * cluster2.length) /
(cluster1.length + cluster2.length)
);
}

/**
* @private
* Returns the most distant point and his distance
Expand Down Expand Up @@ -158,11 +48,6 @@ function diff(splitting, data, disFun) {
return ans;
}

var defaultOptions = {
dist: euclidean,
kind: 'single'
};

/**
* @private
* Intra-cluster distance
Expand All @@ -186,34 +71,13 @@ function intrDist(index, data, disFun) {
/**
* Splits the higher level clusters
* @param {Array <Array <number>>} data - Array of points to be clustered
* @param {json} options
* @param {object} [options]
* @param {Function} [options.distanceFunction]
* @param {string} [options.kind]
* @constructor
*/
export function diana(data, options) {
options = Object.assign({}, defaultOptions, options);
if (typeof options.kind === 'string') {
switch (options.kind) {
case 'single':
options.kind = simpleLink;
break;
case 'complete':
options.kind = completeLink;
break;
case 'average':
options.kind = averageLink;
break;
case 'centroid':
options.kind = centroidLink;
break;
case 'ward':
options.kind = wardLink;
break;
default:
throw new RangeError('Unknown kind of similarity');
}
} else if (typeof options.kind !== 'function') {
throw new TypeError('Undefined kind of similarity');
}
export function diana(data, options = {}) {
const { distanceFunction = euclidean } = options;
var tree = new Cluster();
tree.children = new Array(data.length);
tree.index = new Array(data.length);
Expand All @@ -222,7 +86,7 @@ export function diana(data, options) {
tree.index[ind] = new ClusterLeaf(ind);
}

tree.distance = intrDist(tree.index, data, options.dist);
tree.distance = intrDist(tree.index, data, distanceFunction);
var m, M, clId, dist, rebel;
var list = [tree];
while (list.length > 0) {
Expand All @@ -233,7 +97,7 @@ export function diana(data, options) {
for (var j = 0; j < list[i].length; j++) {
for (var l = j + 1; l < list[i].length; l++) {
m = Math.max(
options.dist(
distanceFunction(
data[list[i].index[j].index],
data[list[i].index[l].index]
),
Expand All @@ -249,7 +113,7 @@ export function diana(data, options) {
M = 0;
if (list[clId].index.length === 2) {
list[clId].children = [list[clId].index[0], list[clId].index[1]];
list[clId].distance = options.dist(
list[clId].distance = distanceFunction(
data[list[clId].index[0].index],
data[list[clId].index[1].index]
);
Expand All @@ -260,11 +124,11 @@ export function diana(data, options) {
list[clId].index[2]
];
var d = [
options.dist(
distanceFunction(
data[list[clId].index[0].index],
data[list[clId].index[1].index]
),
options.dist(
distanceFunction(
data[list[clId].index[1].index],
data[list[clId].index[2].index]
)
Expand All @@ -281,7 +145,7 @@ export function diana(data, options) {
dist = 0;
for (var jj = 0; jj < splitting[0].length; jj++) {
if (ii !== jj) {
dist += options.dist(
dist += distanceFunction(
data[list[clId].index[splitting[0][jj]].index],
data[list[clId].index[splitting[0][ii]].index]
);
Expand All @@ -295,11 +159,11 @@ export function diana(data, options) {
}
splitting[1] = [rebel];
splitting[0].splice(rebel, 1);
dist = diff(splitting, data, options.dist);
dist = diff(splitting, data, distanceFunction);
while (dist.d > 0) {
splitting[1].push(splitting[0][dist.p]);
splitting[0].splice(dist.p, 1);
dist = diff(splitting, data, options.dist);
dist = diff(splitting, data, distanceFunction);
}
var fData = new Array(splitting[0].length);
C.index = new Array(splitting[0].length);
Expand All @@ -315,8 +179,8 @@ export function diana(data, options) {
sG.index[f] = list[clId].index[splitting[1][f]];
sG.children[f] = list[clId].index[splitting[1][f]];
}
C.distance = intrDist(C.index, data, options.dist);
sG.distance = intrDist(sG.index, data, options.dist);
C.distance = intrDist(C.index, data, distanceFunction);
sG.distance = intrDist(sG.index, data, distanceFunction);
list.push(C);
list.push(sG);
list[clId].children = [C, sG];
Expand Down

0 comments on commit f49c7af

Please sign in to comment.