package de.hsel.spm.baudas.analysis; import org.jetbrains.annotations.NotNull; import weka.clusterers.SimpleKMeans; import weka.core.Instances; import weka.filters.Filter; import weka.filters.unsupervised.attribute.Remove; import java.io.File; import java.util.HashMap; import java.util.Map; /** * Cluster Analysis. * * @author Johannes Theiner * @version 0.1 * @since 1.0 */ public class Cluster implements Analysis>> { private Instances instances; private Map> result; public Cluster(File file) { instances = load(file); } /** * get result of cluster analysis, cluster 0 is the average. * * @return Result */ @Override public Map> getResult() { if (result != null) { return result; } result = new HashMap<>(); int[] keepIndexes = new int[]{Attribute.SEX, Attribute.AGE, Attribute.MARITAL_STATUS, Attribute.SHOPPING_DAY, Attribute.SHOPPING_HOUR, Attribute.RESIDENCE}; Remove remove = new Remove(); try { remove.setAttributeIndicesArray(keepIndexes); remove.setInvertSelection(true); remove.setInputFormat(instances); instances = Filter.useFilter(instances, remove); } catch (Exception e) { e.printStackTrace(); } //creating a single cluster to get average, weka has no way to get that from a bigger one. SimpleKMeans averageMeans = new SimpleKMeans(); Instances averageCentroids = null; try { averageMeans.setNumClusters(1); averageMeans.setPreserveInstancesOrder(true); averageMeans.buildClusterer(instances); averageCentroids = averageMeans.getClusterCentroids(); } catch (Exception ex) { ex.printStackTrace(); } assert averageCentroids != null; //creating real cluster SimpleKMeans fullMeans = new SimpleKMeans(); try { fullMeans.setNumClusters(5); fullMeans.setPreserveInstancesOrder(true); fullMeans.buildClusterer(instances); int count = 0; count = putIntoMap(averageCentroids, count); Instances centroids = fullMeans.getClusterCentroids(); putIntoMap(centroids, count); } catch (Exception ex) { ex.printStackTrace(); } return result; } /** * puts data into map. * * @param centroids cluster analysis result * @param count current insert count * @return count increment */ private int putIntoMap(@NotNull Instances centroids, int count) { for (int i = 0; i < centroids.numInstances(); i++) { Map map = new HashMap<>(); for (int j = 0; j < centroids.numAttributes(); j++) { map.put(centroids.attribute(j).name(), centroids.instance(i).stringValue(j)); } result.put(count, map); count++; } return count; } }