Signed-off-by: Johannes Theiner <j.theiner@live.de> #SPM-32: add work 2h development
103 lines
3.1 KiB
Java
103 lines
3.1 KiB
Java
package de.hsel.spm.baudas.analysis;
|
|
|
|
import org.jetbrains.annotations.NotNull;
|
|
import weka.clusterers.SimpleKMeans;
|
|
import weka.core.Instances;
|
|
import weka.filters.Filter;
|
|
import weka.filters.unsupervised.attribute.Remove;
|
|
|
|
import java.io.File;
|
|
import java.util.HashMap;
|
|
import java.util.Map;
|
|
|
|
/**
|
|
* Cluster Analysis.
|
|
*
|
|
* @author Johannes Theiner
|
|
* @version 0.1
|
|
* @since 1.0
|
|
*/
|
|
public class Cluster implements Analysis<Map<Integer, Map<String, String>>> {
|
|
|
|
private Instances instances;
|
|
private Map<Integer, Map<String, String>> result;
|
|
|
|
public Cluster(File file) {
|
|
instances = load(file);
|
|
}
|
|
|
|
/**
|
|
* get result of cluster analysis, cluster 0 is the average.
|
|
*
|
|
* @return Result
|
|
*/
|
|
@Override
|
|
public Map<Integer, Map<String, String>> getResult() {
|
|
if (result != null) {
|
|
return result;
|
|
}
|
|
result = new HashMap<>();
|
|
int[] keepIndexes = new int[]{Attribute.SEX, Attribute.AGE, Attribute.MARITAL_STATUS, Attribute.SHOPPING_DAY, Attribute.SHOPPING_HOUR, Attribute.RESIDENCE};
|
|
Remove remove = new Remove();
|
|
|
|
try {
|
|
remove.setAttributeIndicesArray(keepIndexes);
|
|
remove.setInvertSelection(true);
|
|
remove.setInputFormat(instances);
|
|
instances = Filter.useFilter(instances, remove);
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
}
|
|
|
|
//creating a single cluster to get average, weka has no way to get that from a bigger one.
|
|
SimpleKMeans averageMeans = new SimpleKMeans();
|
|
Instances averageCentroids = null;
|
|
try {
|
|
averageMeans.setNumClusters(1);
|
|
averageMeans.setPreserveInstancesOrder(true);
|
|
averageMeans.buildClusterer(instances);
|
|
averageCentroids = averageMeans.getClusterCentroids();
|
|
|
|
} catch (Exception ex) {
|
|
ex.printStackTrace();
|
|
}
|
|
assert averageCentroids != null;
|
|
|
|
//creating real cluster
|
|
SimpleKMeans fullMeans = new SimpleKMeans();
|
|
try {
|
|
fullMeans.setNumClusters(5);
|
|
fullMeans.setPreserveInstancesOrder(true);
|
|
fullMeans.buildClusterer(instances);
|
|
|
|
int count = 0;
|
|
count = putIntoMap(averageCentroids, count);
|
|
|
|
Instances centroids = fullMeans.getClusterCentroids();
|
|
putIntoMap(centroids, count);
|
|
|
|
} catch (Exception ex) {
|
|
ex.printStackTrace();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* puts data into map.
|
|
*
|
|
* @param centroids cluster analysis result
|
|
* @param count current insert count
|
|
* @return count increment
|
|
*/
|
|
private int putIntoMap(@NotNull Instances centroids, int count) {
|
|
for (int i = 0; i < centroids.numInstances(); i++) {
|
|
Map<String, String> map = new HashMap<>();
|
|
for (int j = 0; j < centroids.numAttributes(); j++) {
|
|
map.put(centroids.attribute(j).name(), centroids.instance(i).stringValue(j));
|
|
}
|
|
result.put(count, map);
|
|
count++;
|
|
}
|
|
return count;
|
|
}
|
|
} |