Softwareprojektmanagement/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java

102 lines
3.0 KiB
Java

package de.hsel.spm.baudas.analysis;
import org.jetbrains.annotations.NotNull;
import weka.clusterers.SimpleKMeans;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
/**
* Cluster Analysis.
*
* @author Johannes Theiner
* @version 0.1
*/
public class Cluster implements Analysis<Map<Integer, Map<String, String>>> {
private Instances instances;
private Map<Integer, Map<String, String>> result;
public Cluster(File file) {
instances = load(file);
}
/**
* get result of cluster analysis.
*
* @return Result
*/
@Override
public Map<Integer, Map<String, String>> getResult() {
if (result != null) {
return result;
}
result = new HashMap<>();
int[] keepIndexes = new int[]{Attribute.SEX, Attribute.AGE, Attribute.MARITAL_STATUS, Attribute.SHOPPING_DAY, Attribute.SHOPPING_HOUR, Attribute.RESIDENCE};
Remove remove = new Remove();
try {
remove.setAttributeIndicesArray(keepIndexes);
remove.setInvertSelection(true);
remove.setInputFormat(instances);
instances = Filter.useFilter(instances, remove);
} catch (Exception e) {
e.printStackTrace();
}
//creating a single cluster to get average, weka has no way to get that from a bigger one.
SimpleKMeans averageMeans = new SimpleKMeans();
Instances averageCentroids = null;
try {
averageMeans.setNumClusters(1);
averageMeans.setPreserveInstancesOrder(true);
averageMeans.buildClusterer(instances);
averageCentroids = averageMeans.getClusterCentroids();
} catch (Exception ex) {
ex.printStackTrace();
}
assert averageCentroids != null;
//creating real cluster
SimpleKMeans fullMeans = new SimpleKMeans();
try {
fullMeans.setNumClusters(5);
fullMeans.setPreserveInstancesOrder(true);
fullMeans.buildClusterer(instances);
int count = 0;
count = putIntoMap(averageCentroids, count);
Instances centroids = fullMeans.getClusterCentroids();
putIntoMap(centroids, count);
} catch (Exception ex) {
ex.printStackTrace();
}
return result;
}
/**
* puts data into map.
*
* @param centroids cluster analysis result
* @param count current insert count
* @return count increment
*/
private int putIntoMap(@NotNull Instances centroids, int count) {
for (int i = 0; i < centroids.numInstances(); i++) {
Map<String, String> map = new HashMap<>();
for (int j = 0; j < centroids.numAttributes(); j++) {
map.put(centroids.attribute(j).name(), centroids.instance(i).stringValue(j));
}
result.put(count, map);
count++;
}
return count;
}
}