From 368af15fd8368cf6694200f1715a0468083740d0 Mon Sep 17 00:00:00 2001 From: joethei Date: Thu, 25 Apr 2019 13:02:59 +0200 Subject: [PATCH] =?UTF-8?q?Cluster=20fast=20fertig,=20ein=20bischen=20aufr?= =?UTF-8?q?=C3=A4umen=20noch=20n=C3=B6tig?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #SPM-13: add work development 2h 20m Cluster fertig --- .../de/hsel/spm/baudas/analysis/Cluster.java | 88 +++++++++++++++++++ .../hsel/spm/baudas/analysis/ClusterTest.java | 29 ++++++ 2 files changed, 117 insertions(+) create mode 100644 src/main/java/de/hsel/spm/baudas/analysis/Cluster.java create mode 100644 src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java diff --git a/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java b/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java new file mode 100644 index 0000000..d340747 --- /dev/null +++ b/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java @@ -0,0 +1,88 @@ +package de.hsel.spm.baudas.analysis; + +import weka.clusterers.SimpleKMeans; +import weka.core.Instances; +import weka.filters.Filter; +import weka.filters.unsupervised.attribute.Remove; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +/** + * Cluster Analysis + * + * @author Johannes Theiner + * @version 0.1 + */ +public class Cluster implements Analysis>> { + + private Instances instances; + private Map> result; + + public Cluster(File file) { + instances = load(file); + } + + @Override + public Map> getResult() { + if(result == null) { + result = new HashMap<>(); + //TODO: anpassen wenn #SPM-17 gemerged ist. + int[] keepIndexes = new int[]{0, 1, 3, 5, 6, 7}; + Remove remove = new Remove(); + + try { + remove.setAttributeIndicesArray(keepIndexes); + remove.setInvertSelection(true); + remove.setInputFormat(instances); + instances = Filter.useFilter(instances, remove); + } catch (Exception e) { + e.printStackTrace(); + } + + SimpleKMeans fullMeans = new SimpleKMeans(); + Instances fullCentroids = null; + try { + fullMeans.setNumClusters(1); + fullMeans.setPreserveInstancesOrder(true); + fullMeans.buildClusterer(instances); + fullCentroids = fullMeans.getClusterCentroids(); + + } catch (Exception ex) { + ex.printStackTrace(); + } + assert fullCentroids != null; + + SimpleKMeans kMeans = new SimpleKMeans(); + try { + kMeans.setNumClusters(5); + kMeans.setPreserveInstancesOrder(true); + kMeans.buildClusterer(instances); + + int count = 0; + count = putIntoMap(fullCentroids, count); + + Instances centroids = kMeans.getClusterCentroids(); + putIntoMap(centroids, count); + + } catch (Exception ex) { + ex.printStackTrace(); + } + } + + return result; + } + + private int putIntoMap(Instances centroids, int count) { + for (int i = 0; i < centroids.numInstances(); i++) { + Map map = new HashMap<>(); + for (int j = 0; j < centroids.numAttributes(); j++) { + map.put(centroids.attribute(j).name(), centroids.instance(i).stringValue(j)); + } + result.put(count, map); + count++; + } + return count; + } +} \ No newline at end of file diff --git a/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java b/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java new file mode 100644 index 0000000..2711fa6 --- /dev/null +++ b/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java @@ -0,0 +1,29 @@ +package de.hsel.spm.baudas.analysis; + +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.net.URL; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTimeout; + +public class ClusterTest { + + @Test + public void test100() { + URL url = getClass().getClassLoader().getResource("kd100.csv"); + assert url != null; + + Cluster cluster = new Cluster(new File(url.getFile())); + + AtomicReference>> results = new AtomicReference<>(); + + assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult())); + + assertEquals("m", results.get().get(0).get("Geschlecht")); + } +} \ No newline at end of file