Source code for skcmeans.initialization

from scipy.spatial.distance import cdist
import numpy as np

from sklearn.utils import check_random_state

from . import algorithms


[docs]def initialize_random(x, k, random_state=None, eps=1e-12): """Selects initial points randomly from the data. Parameters ---------- x : :class:`np.ndarray` (n_samples, n_features) The original data. k : int The number of points to select. random_state : int or :class:`np.random.RandomState`, optional The generator used for initialization. Using an integer fixes the seed. Returns ------- Unitialized memberships selection : :class:`np.ndarray` (k, n_features) A length-k subset of the original data. """ n_samples = x.shape[0] seeds = check_random_state(random_state).permutation(n_samples)[:k] selection = x[seeds] + eps distances = cdist(x, selection) normalized_distance = distances / np.sum(distances, axis=1)[:, np.newaxis] return 1-normalized_distance, selection
[docs]def initialize_probabilistic(x, k, random_state=None): """Selects initial points using a probabilistic clustering approximation. Parameters ---------- x : :class:`np.ndarray` (n_samples, n_features) The original data. k : int The number of points to select. random_state : int or :obj:`np.random.RandomState`, optional The generator used for initialization. Using an integer fixes the seed. Returns ------- :class:`np.ndarray` (n_samples, k) Cluster memberships :class:`np.ndarray` (k, n_features) Cluster centers """ clusterer = algorithms.Probabilistic(n_clusters=k, random_state=random_state) clusterer.converge(x) return clusterer.memberships, clusterer.centers