diff --git a/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java b/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java new file mode 100644 index 0000000..10e25b1 --- /dev/null +++ b/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java @@ -0,0 +1,101 @@ +package de.hsel.spm.baudas.analysis; + +import weka.clusterers.SimpleKMeans; +import weka.core.Instances; +import weka.filters.Filter; +import weka.filters.unsupervised.attribute.Remove; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +/** + * Cluster Analysis. + * + * @author Johannes Theiner + * @version 0.1 + */ +public class Cluster implements Analysis>> { + + private Instances instances; + private Map> result; + + public Cluster(File file) { + instances = load(file); + } + + /** + * get result of cluster analysis. + * + * @return Result + */ + @Override + public Map> getResult() { + if(result == null) { + result = new HashMap<>(); + //TODO: anpassen wenn #SPM-17 gemerged ist. + int[] keepIndexes = new int[]{0, 1, 3, 5, 6, 7}; + Remove remove = new Remove(); + + try { + remove.setAttributeIndicesArray(keepIndexes); + remove.setInvertSelection(true); + remove.setInputFormat(instances); + instances = Filter.useFilter(instances, remove); + } catch (Exception e) { + e.printStackTrace(); + } + + //creating a single cluster to get average, weka has no way to get that from a bigger one. + SimpleKMeans fullMeans = new SimpleKMeans(); + Instances fullCentroids = null; + try { + fullMeans.setNumClusters(1); + fullMeans.setPreserveInstancesOrder(true); + fullMeans.buildClusterer(instances); + fullCentroids = fullMeans.getClusterCentroids(); + + } catch (Exception ex) { + ex.printStackTrace(); + } + assert fullCentroids != null; + + //creating real cluster + SimpleKMeans kMeans = new SimpleKMeans(); + try { + kMeans.setNumClusters(5); + kMeans.setPreserveInstancesOrder(true); + kMeans.buildClusterer(instances); + + int count = 0; + count = putIntoMap(fullCentroids, count); + + Instances centroids = kMeans.getClusterCentroids(); + putIntoMap(centroids, count); + + } catch (Exception ex) { + ex.printStackTrace(); + } + } + return result; + } + + /** + * puts data into map. + * + * @param centroids cluster analysis result + * @param count current insert count + * @return count increment + */ + private int putIntoMap(Instances centroids, int count) { + for (int i = 0; i < centroids.numInstances(); i++) { + Map map = new HashMap<>(); + for (int j = 0; j < centroids.numAttributes(); j++) { + map.put(centroids.attribute(j).name(), centroids.instance(i).stringValue(j)); + } + result.put(count, map); + count++; + } + return count; + } +} \ No newline at end of file diff --git a/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java b/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java new file mode 100644 index 0000000..8eddd85 --- /dev/null +++ b/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java @@ -0,0 +1,64 @@ +package de.hsel.spm.baudas.analysis; + +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.net.URL; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTimeout; + +/** + * test for Cluster Analysis. + * + * @author Julian Hinxlage + * @version 0.1 + */ +class ClusterTest { + + @Test + void test100() { + URL url = getClass().getClassLoader().getResource("kd100.csv"); + assert url != null; + + Cluster cluster = new Cluster(new File(url.getFile())); + + AtomicReference>> results = new AtomicReference<>(); + + + assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult())); + + assertEquals("m", results.get().get(0).get("Geschlecht")); + } + + @Test + void test1000() { + URL url = getClass().getClassLoader().getResource("kd1000.csv"); + assert url != null; + + Cluster cluster = new Cluster(new File(url.getFile())); + + AtomicReference>> results = new AtomicReference<>(); + + assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult())); + + assertEquals("m", results.get().get(0).get("Geschlecht")); + } + + @Test + void test10000() { + URL url = getClass().getClassLoader().getResource("kd10000.csv"); + assert url != null; + + Cluster cluster = new Cluster(new File(url.getFile())); + + AtomicReference>> results = new AtomicReference<>(); + + assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult())); + + assertEquals("m", results.get().get(0).get("Geschlecht")); + } +} \ No newline at end of file